diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2010-02-16 09:30:23 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2010-02-16 09:30:23 +0000 |
commit | f25ddd991a5601d0101602c4c263a58c7af4b8a2 (patch) | |
tree | 4cfca640904d1896e25032757a61f8959c066919 /lib/Target/X86 | |
parent | 3fd58f91dd318518f7daa4ba64c0aaf31799d89b (diff) | |
download | FreeBSD-src-f25ddd991a5601d0101602c4c263a58c7af4b8a2.zip FreeBSD-src-f25ddd991a5601d0101602c4c263a58c7af4b8a2.tar.gz |
Update LLVM to r96341.
Diffstat (limited to 'lib/Target/X86')
55 files changed, 2797 insertions, 1859 deletions
diff --git a/lib/Target/X86/AsmParser/Makefile b/lib/Target/X86/AsmParser/Makefile index 288b985..25fb0a2 100644 --- a/lib/Target/X86/AsmParser/Makefile +++ b/lib/Target/X86/AsmParser/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMX86AsmParser -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' x86 target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp index 1a62044..a58f58e 100644 --- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp +++ b/lib/Target/X86/AsmParser/X86AsmLexer.cpp @@ -7,8 +7,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Target/TargetAsmLexer.h" #include "llvm/Target/TargetRegistry.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "X86.h" @@ -19,18 +22,119 @@ namespace { class X86AsmLexer : public TargetAsmLexer { const MCAsmInfo &AsmInfo; + + bool tentativeIsValid; + AsmToken tentativeToken; + + const AsmToken &lexTentative() { + tentativeToken = getLexer()->Lex(); + tentativeIsValid = true; + return tentativeToken; + } + + const AsmToken &lexDefinite() { + if(tentativeIsValid) { + tentativeIsValid = false; + return tentativeToken; + } + else { + return getLexer()->Lex(); + } + } + + AsmToken LexTokenATT(); + AsmToken LexTokenIntel(); protected: - AsmToken LexToken(); + AsmToken LexToken() { + if (!Lexer) { + SetError(SMLoc(), "No MCAsmLexer installed"); + return AsmToken(AsmToken::Error, "", 0); + } + + switch (AsmInfo.getAssemblerDialect()) { + default: + SetError(SMLoc(), "Unhandled dialect"); + return AsmToken(AsmToken::Error, "", 0); + case 0: + return LexTokenATT(); + case 1: + return LexTokenIntel(); + } + } public: X86AsmLexer(const Target &T, const MCAsmInfo &MAI) - : TargetAsmLexer(T), AsmInfo(MAI) { + : TargetAsmLexer(T), AsmInfo(MAI), tentativeIsValid(false) { } }; } -AsmToken X86AsmLexer::LexToken() { - return AsmToken(AsmToken::Error, "", 0); +static unsigned MatchRegisterName(StringRef Name); + +AsmToken X86AsmLexer::LexTokenATT() { + const AsmToken lexedToken = lexDefinite(); + + switch (lexedToken.getKind()) { + default: + return AsmToken(lexedToken); + case AsmToken::Error: + SetError(Lexer->getErrLoc(), Lexer->getErr()); + return AsmToken(lexedToken); + case AsmToken::Percent: + { + const AsmToken &nextToken = lexTentative(); + if (nextToken.getKind() == AsmToken::Identifier) { + unsigned regID = MatchRegisterName(nextToken.getString()); + + if (regID) { + lexDefinite(); + + StringRef regStr(lexedToken.getString().data(), + lexedToken.getString().size() + + nextToken.getString().size()); + + return AsmToken(AsmToken::Register, + regStr, + static_cast<int64_t>(regID)); + } + else { + return AsmToken(lexedToken); + } + } + else { + return AsmToken(lexedToken); + } + } + } +} + +AsmToken X86AsmLexer::LexTokenIntel() { + const AsmToken &lexedToken = lexDefinite(); + + switch(lexedToken.getKind()) { + default: + return AsmToken(lexedToken); + case AsmToken::Error: + SetError(Lexer->getErrLoc(), Lexer->getErr()); + return AsmToken(lexedToken); + case AsmToken::Identifier: + { + std::string upperCase = lexedToken.getString().str(); + std::string lowerCase = LowercaseString(upperCase); + StringRef lowerRef(lowerCase); + + unsigned regID = MatchRegisterName(lowerRef); + + if (regID) { + return AsmToken(AsmToken::Register, + lexedToken.getString(), + static_cast<int64_t>(regID)); + } + else { + return AsmToken(lexedToken); + } + } + } } extern "C" void LLVMInitializeX86AsmLexer() { @@ -38,6 +142,6 @@ extern "C" void LLVMInitializeX86AsmLexer() { RegisterAsmLexer<X86AsmLexer> Y(TheX86_64Target); } -//#define REGISTERS_ONLY -//#include "../X86GenAsmMatcher.inc" -//#undef REGISTERS_ONLY +#define REGISTERS_ONLY +#include "X86GenAsmMatcher.inc" +#undef REGISTERS_ONLY diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 7a9218e..84d7bb7 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -10,6 +10,7 @@ #include "llvm/Target/TargetAsmParser.h" #include "X86.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" @@ -67,7 +68,7 @@ public: /// @name Auto-generated Match Functions /// { -static unsigned MatchRegisterName(const StringRef &Name); +static unsigned MatchRegisterName(StringRef Name); /// } @@ -172,8 +173,25 @@ struct X86Operand : public MCParsedAsmOperand { bool isMem() const { return Kind == Memory; } + bool isAbsMem() const { + return Kind == Memory && !getMemSegReg() && !getMemBaseReg() && + !getMemIndexReg() && getMemScale() == 1; + } + + bool isNoSegMem() const { + return Kind == Memory && !getMemSegReg(); + } + bool isReg() const { return Kind == Register; } + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediates when possible. + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(Expr)); + } + void addRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getReg())); @@ -181,26 +199,35 @@ struct X86Operand : public MCParsedAsmOperand { void addImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateExpr(getImm())); + addExpr(Inst, getImm()); } void addImmSExt8Operands(MCInst &Inst, unsigned N) const { // FIXME: Support user customization of the render method. assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateExpr(getImm())); + addExpr(Inst, getImm()); } void addMemOperands(MCInst &Inst, unsigned N) const { - assert((N == 4 || N == 5) && "Invalid number of operands!"); - + assert((N == 5) && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); Inst.addOperand(MCOperand::CreateImm(getMemScale())); Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); + addExpr(Inst, getMemDisp()); + Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); + } + + void addAbsMemOperands(MCInst &Inst, unsigned N) const { + assert((N == 1) && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); + } - // FIXME: What a hack. - if (N == 5) - Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); + void addNoSegMemOperands(MCInst &Inst, unsigned N) const { + assert((N == 4) && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); + Inst.addOperand(MCOperand::CreateImm(getMemScale())); + Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); + addExpr(Inst, getMemDisp()); } static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { @@ -222,10 +249,24 @@ struct X86Operand : public MCParsedAsmOperand { return Res; } + /// Create an absolute memory operand. + static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, + SMLoc EndLoc) { + X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); + Res->Mem.SegReg = 0; + Res->Mem.Disp = Disp; + Res->Mem.BaseReg = 0; + Res->Mem.IndexReg = 0; + Res->Mem.Scale = 1; + return Res; + } + + /// Create a generalized memory operand. static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) { - // We should never just have a displacement, that would be an immediate. + // We should never just have a displacement, that should be parsed as an + // absolute memory operand. assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); // The scale should always be one of {1,2,4,8}. @@ -259,6 +300,42 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, // FIXME: Validate register for the current architecture; we have to do // validation later, so maybe there is no need for this here. RegNo = MatchRegisterName(Tok.getString()); + + // Parse %st(1) and "%st" as "%st(0)" + if (RegNo == 0 && Tok.getString() == "st") { + RegNo = X86::ST0; + EndLoc = Tok.getLoc(); + Parser.Lex(); // Eat 'st' + + // Check to see if we have '(4)' after %st. + if (getLexer().isNot(AsmToken::LParen)) + return false; + // Lex the paren. + getParser().Lex(); + + const AsmToken &IntTok = Parser.getTok(); + if (IntTok.isNot(AsmToken::Integer)) + return Error(IntTok.getLoc(), "expected stack index"); + switch (IntTok.getIntVal()) { + case 0: RegNo = X86::ST0; break; + case 1: RegNo = X86::ST1; break; + case 2: RegNo = X86::ST2; break; + case 3: RegNo = X86::ST3; break; + case 4: RegNo = X86::ST4; break; + case 5: RegNo = X86::ST5; break; + case 6: RegNo = X86::ST6; break; + case 7: RegNo = X86::ST7; break; + default: return Error(IntTok.getLoc(), "invalid stack index"); + } + + if (getParser().Lex().isNot(AsmToken::RParen)) + return Error(Parser.getTok().getLoc(), "expected ')'"); + + EndLoc = Tok.getLoc(); + Parser.Lex(); // Eat ')' + return false; + } + if (RegNo == 0) return Error(Tok.getLoc(), "invalid register name"); @@ -300,8 +377,8 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() { // We have to disambiguate a parenthesized expression "(4+5)" from the start // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The - // only way to do this without lookahead is to eat the ( and see what is after - // it. + // only way to do this without lookahead is to eat the '(' and see what is + // after it. const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); if (getLexer().isNot(AsmToken::LParen)) { SMLoc ExprEnd; @@ -312,7 +389,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() { if (getLexer().isNot(AsmToken::LParen)) { // Unless we have a segment register, treat this as an immediate. if (SegReg == 0) - return X86Operand::CreateImm(Disp, MemStart, ExprEnd); + return X86Operand::CreateMem(Disp, MemStart, ExprEnd); return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); } @@ -339,7 +416,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() { if (getLexer().isNot(AsmToken::LParen)) { // Unless we have a segment register, treat this as an immediate. if (SegReg == 0) - return X86Operand::CreateImm(Disp, LParenLoc, ExprEnd); + return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd); return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd); } @@ -424,8 +501,20 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() { bool X86ATTAsmParser:: ParseInstruction(const StringRef &Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - - Operands.push_back(X86Operand::CreateToken(Name, NameLoc)); + // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to + // represent alternative syntaxes in the .td file, without requiring + // instruction duplication. + StringRef PatchedName = StringSwitch<StringRef>(Name) + .Case("sal", "shl") + .Case("salb", "shlb") + .Case("sall", "shll") + .Case("salq", "shlq") + .Case("salw", "shlw") + .Case("repe", "rep") + .Case("repz", "rep") + .Case("repnz", "repne") + .Default(Name); + Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); if (getLexer().isNot(AsmToken::EndOfStatement)) { diff --git a/lib/Target/X86/AsmPrinter/Makefile b/lib/Target/X86/AsmPrinter/Makefile index 326a22f..2368761 100644 --- a/lib/Target/X86/AsmPrinter/Makefile +++ b/lib/Target/X86/AsmPrinter/Makefile @@ -8,7 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../../.. LIBRARYNAME = LLVMX86AsmPrinter -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' x86 target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp index 804dbb9..1a35a49 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp @@ -18,17 +18,22 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" #include "X86GenInstrNames.inc" using namespace llvm; // Include the auto-generated portion of the assembly writer. #define MachineInstr MCInst -#define NO_ASM_WRITER_BOILERPLATE +#define GET_INSTRUCTION_NAME #include "X86GenAsmWriter.inc" #undef MachineInstr void X86ATTInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); } +StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const { + return getInstructionName(Opcode); +} + void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op) { switch (MI->getOperand(Op).getImm()) { @@ -66,6 +71,10 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo) { O << '%' << getRegisterName(Op.getReg()); } else if (Op.isImm()) { O << '$' << Op.getImm(); + + if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256)) + *CommentStream << format("imm = 0x%X\n", Op.getImm()); + } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); O << '$' << *Op.getExpr(); diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h index 3180618..d109a07 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h @@ -26,11 +26,12 @@ public: virtual void printInst(const MCInst *MI); - + virtual StringRef getOpcodeName(unsigned Opcode) const; + // Autogenerated by tblgen. void printInstruction(const MCInst *MI); static const char *getRegisterName(unsigned RegNo); - + static const char *getInstructionName(unsigned Opcode); void printOperand(const MCInst *MI, unsigned OpNo); void printMemReference(const MCInst *MI, unsigned Op); diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index 2ffa18f..8cab24c 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -8,12 +8,10 @@ //===----------------------------------------------------------------------===// // // This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to AT&T format assembly -// language. This printer is the output mechanism used by `llc'. +// of machine-dependent LLVM code to X86 machine code. // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "X86AsmPrinter.h" #include "X86ATTInstPrinter.h" #include "X86IntelInstPrinter.h" @@ -29,169 +27,70 @@ #include "llvm/Type.h" #include "llvm/Assembly/Writer.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; -STATISTIC(EmittedInsts, "Number of machine instrs printed"); - //===----------------------------------------------------------------------===// // Primitive Helper Functions. //===----------------------------------------------------------------------===// -void X86AsmPrinter::printMCInst(const MCInst *MI) { - if (MAI->getAssemblerDialect() == 0) - X86ATTInstPrinter(O, *MAI).printInstruction(MI); - else - X86IntelInstPrinter(O, *MAI).printInstruction(MI); -} - void X86AsmPrinter::PrintPICBaseSymbol() const { - // FIXME: Gross const cast hack. - X86AsmPrinter *AP = const_cast<X86AsmPrinter*>(this); - O << *X86MCInstLower(OutContext, 0, *AP).GetPICBaseSymbol(); + const TargetLowering *TLI = TM.getTargetLowering(); + O << *static_cast<const X86TargetLowering*>(TLI)->getPICBaseSymbol(MF, + OutContext); } -void X86AsmPrinter::emitFunctionHeader(const MachineFunction &MF) { - unsigned FnAlign = MF.getAlignment(); - const Function *F = MF.getFunction(); +MCSymbol *X86AsmPrinter::GetGlobalValueSymbol(const GlobalValue *GV) const { + SmallString<60> NameStr; + Mang->getNameWithPrefix(NameStr, GV, false); + MCSymbol *Symb = OutContext.GetOrCreateSymbol(NameStr.str()); if (Subtarget->isTargetCygMing()) { - X86COFFMachineModuleInfo &COFFMMI = + X86COFFMachineModuleInfo &COFFMMI = MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); - COFFMMI.DecorateCygMingName(CurrentFnSym, OutContext, F, - *TM.getTargetData()); - } - - OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); - EmitAlignment(FnAlign, F); - - switch (F->getLinkage()) { - default: llvm_unreachable("Unknown linkage type!"); - case Function::InternalLinkage: // Symbols default to internal. - case Function::PrivateLinkage: - break; - case Function::DLLExportLinkage: - case Function::ExternalLinkage: - OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_Global); - break; - case Function::LinkerPrivateLinkage: - case Function::LinkOnceAnyLinkage: - case Function::LinkOnceODRLinkage: - case Function::WeakAnyLinkage: - case Function::WeakODRLinkage: - if (Subtarget->isTargetDarwin()) { - OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_Global); - O << MAI->getWeakDefDirective() << *CurrentFnSym << '\n'; - } else if (Subtarget->isTargetCygMing()) { - OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_Global); - O << "\t.linkonce discard\n"; - } else { - O << "\t.weak\t" << *CurrentFnSym << '\n'; - } - break; - } + COFFMMI.DecorateCygMingName(Symb, OutContext, GV, *TM.getTargetData()); - printVisibility(CurrentFnSym, F->getVisibility()); + // Save function name for later type emission. + if (const Function *F = dyn_cast<Function>(GV)) + if (F->isDeclaration()) + COFFMMI.addExternalFunction(Symb->getName()); - if (Subtarget->isTargetELF()) { - O << "\t.type\t" << *CurrentFnSym << ",@function\n"; - } else if (Subtarget->isTargetCygMing()) { - O << "\t.def\t " << *CurrentFnSym; - O << ";\t.scl\t" << - (F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT) - << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT) - << ";\t.endef\n"; } - O << *CurrentFnSym << ':'; - if (VerboseAsm) { - O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() << ' '; - WriteAsOperand(O, F, /*PrintType=*/false, F->getParent()); - } - O << '\n'; - - // Add some workaround for linkonce linkage on Cygwin\MinGW - if (Subtarget->isTargetCygMing() && - (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) - O << "Lllvm$workaround$fake$stub$" << *CurrentFnSym << ":\n"; + return Symb; } -/// runOnMachineFunction - This uses the printMachineInstruction() -/// method to print assembly for each instruction. +/// runOnMachineFunction - Emit the function body. /// bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { - const Function *F = MF.getFunction(); - this->MF = &MF; - CallingConv::ID CC = F->getCallingConv(); - SetupMachineFunction(MF); - O << "\n\n"; if (Subtarget->isTargetCOFF()) { - X86COFFMachineModuleInfo &COFFMMI = - MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); - - // Populate function information map. Don't want to populate - // non-stdcall or non-fastcall functions' information right now. - if (CC == CallingConv::X86_StdCall || CC == CallingConv::X86_FastCall) - COFFMMI.AddFunctionInfo(F, *MF.getInfo<X86MachineFunctionInfo>()); - } - - // Print out constants referenced by the function - EmitConstantPool(MF.getConstantPool()); - - // Print the 'header' of function - emitFunctionHeader(MF); - - // Emit pre-function debug and/or EH information. - if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling()) - DW->BeginFunction(&MF); - - // Print out code for the function. - bool hasAnyRealCode = false; - for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - // Print a label for the basic block. - EmitBasicBlockStart(I); - for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); - II != IE; ++II) { - // Print the assembly for the instruction. - if (!II->isLabel()) - hasAnyRealCode = true; - printMachineInstruction(II); - } - } - - if (Subtarget->isTargetDarwin() && !hasAnyRealCode) { - // If the function is empty, then we need to emit *something*. Otherwise, - // the function's label might be associated with something that it wasn't - // meant to be associated with. We emit a noop in this situation. - // We are assuming inline asms are code. - O << "\tnop\n"; + const Function *F = MF.getFunction(); + O << "\t.def\t " << *CurrentFnSym << ";\t.scl\t" << + (F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT) + << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT) + << ";\t.endef\n"; } - if (MAI->hasDotTypeDotSizeDirective()) - O << "\t.size\t" << *CurrentFnSym << ", .-" << *CurrentFnSym << '\n'; + // Have common code print out the function header with linkage info etc. + EmitFunctionHeader(); - // Emit post-function debug information. - if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling()) - DW->EndFunction(&MF); - - // Print out jump tables referenced by the function. - EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + // Emit the rest of the function body. + EmitFunctionBody(); // We didn't modify anything. return false; @@ -223,12 +122,6 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { else GVSym = GetGlobalValueSymbol(GV); - if (Subtarget->isTargetCygMing()) { - X86COFFMachineModuleInfo &COFFMMI = - MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); - COFFMMI.DecorateCygMingName(GVSym, OutContext, GV, *TM.getTargetData()); - } - // Handle dllimport linkage. if (MO.getTargetFlags() == X86II::MO_DLLIMPORT) GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName()); @@ -237,20 +130,20 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) { MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - const MCSymbol *&StubSym = + MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym); if (StubSym == 0) StubSym = GetGlobalValueSymbol(GV); } else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){ MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - const MCSymbol *&StubSym = + MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(Sym); if (StubSym == 0) StubSym = GetGlobalValueSymbol(GV); } else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) { MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub"); - const MCSymbol *&StubSym = + MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym); if (StubSym == 0) StubSym = GetGlobalValueSymbol(GV); @@ -272,8 +165,8 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { TempNameStr += StringRef(MO.getSymbolName()); TempNameStr += StringRef("$stub"); - const MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str()); - const MCSymbol *&StubSym = + MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str()); + MCSymbol *&StubSym = MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym); if (StubSym == 0) { TempNameStr.erase(TempNameStr.end()-5, TempNameStr.end()); @@ -338,7 +231,7 @@ void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) { O << MO.getImm(); return; case MachineOperand::MO_MachineBasicBlock: - O << *GetMBBSymbol(MO.getMBB()->getNumber()); + O << *MO.getMBB()->getSymbol(OutContext); return; case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_ExternalSymbol: @@ -451,30 +344,6 @@ void X86AsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op, printLeaMemReference(MI, Op, Modifier); } -void X86AsmPrinter::printPICJumpTableSetLabel(unsigned uid, - const MachineBasicBlock *MBB) const { - if (!MAI->getSetDirective()) - return; - - // We don't need .set machinery if we have GOT-style relocations - if (Subtarget->isPICStyleGOT()) - return; - - O << MAI->getSetDirective() << ' ' << MAI->getPrivateGlobalPrefix() - << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ','; - - O << *GetMBBSymbol(MBB->getNumber()); - - if (Subtarget->isPICStyleRIPRel()) - O << '-' << *GetJTISymbol(uid) << '\n'; - else { - O << '-'; - PrintPICBaseSymbol(); - O << '\n'; - } -} - - void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) { PrintPICBaseSymbol(); O << '\n'; @@ -482,23 +351,6 @@ void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) { O << ':'; } -void X86AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI, - const MachineBasicBlock *MBB, - unsigned uid) const { - const char *JTEntryDirective = MJTI->getEntrySize() == 4 ? - MAI->getData32bitsDirective() : MAI->getData64bitsDirective(); - - O << JTEntryDirective << ' '; - - if (Subtarget->isPICStyleRIPRel() || Subtarget->isPICStyleStubPIC()) { - O << MAI->getPrivateGlobalPrefix() << getFunctionNumber() - << '_' << uid << "_set_" << MBB->getNumber(); - } else if (Subtarget->isPICStyleGOT()) - O << *GetMBBSymbol(MBB->getNumber()) << "@GOTOFF"; - else - O << *GetMBBSymbol(MBB->getNumber()); -} - bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode) { unsigned Reg = MO.getReg(); switch (Mode) { @@ -625,24 +477,6 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, } - -/// printMachineInstruction -- Print out a single X86 LLVM instruction MI in -/// AT&T syntax to the current output stream. -/// -void X86AsmPrinter::printMachineInstruction(const MachineInstr *MI) { - ++EmittedInsts; - - processDebugLoc(MI, true); - - printInstructionThroughMCStreamer(MI); - - if (VerboseAsm) - EmitComments(*MI); - O << '\n'; - - processDebugLoc(MI, false); -} - void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { if (Subtarget->isTargetDarwin()) { // All darwin targets use mach-o. @@ -666,14 +500,17 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { OutStreamer.SwitchSection(TheSection); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - O << *Stubs[i].first << ":\n"; - // Get the MCSymbol without the $stub suffix. - O << "\t.indirect_symbol " << *Stubs[i].second; - O << "\n\thlt ; hlt ; hlt ; hlt ; hlt\n"; + // L_foo$stub: + OutStreamer.EmitLabel(Stubs[i].first); + // .indirect_symbol _foo + OutStreamer.EmitSymbolAttribute(Stubs[i].second, MCSA_IndirectSymbol); + // hlt; hlt; hlt; hlt; hlt hlt = 0xf4 = -12. + const char HltInsts[] = { -12, -12, -12, -12, -12 }; + OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/); } - O << '\n'; Stubs.clear(); + OutStreamer.AddBlankLine(); } // Output stubs for external and common global variables. @@ -686,10 +523,15 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { OutStreamer.SwitchSection(TheSection); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - O << *Stubs[i].first << ":\n\t.indirect_symbol " << *Stubs[i].second; - O << "\n\t.long\t0\n"; + // L_foo$non_lazy_ptr: + OutStreamer.EmitLabel(Stubs[i].first); + // .indirect_symbol _foo + OutStreamer.EmitSymbolAttribute(Stubs[i].second, MCSA_IndirectSymbol); + // .long 0 + OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); } Stubs.clear(); + OutStreamer.AddBlankLine(); } Stubs = MMIMacho.GetHiddenGVStubList(); @@ -698,10 +540,15 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { EmitAlignment(2); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - O << *Stubs[i].first << ":\n" << MAI->getData32bitsDirective(); - O << *Stubs[i].second << '\n'; + // L_foo$non_lazy_ptr: + OutStreamer.EmitLabel(Stubs[i].first); + // .long _foo + OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second, + OutContext), + 4/*size*/, 0/*addrspace*/); } Stubs.clear(); + OutStreamer.AddBlankLine(); } // Funny Darwin hack: This flag tells the linker that no global symbols @@ -735,7 +582,6 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) if (I->hasDLLExportLinkage()) { MCSymbol *Sym = GetGlobalValueSymbol(I); - COFFMMI.DecorateCygMingName(Sym, OutContext, I, *TM.getTargetData()); DLLExportedFns.push_back(Sym); } @@ -757,6 +603,28 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { } } } + + if (Subtarget->isTargetELF()) { + TargetLoweringObjectFileELF &TLOFELF = + static_cast<TargetLoweringObjectFileELF &>(getObjFileLowering()); + + MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); + + // Output stubs for external and common global variables. + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); + const TargetData *TD = TM.getTargetData(); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) + O << *Stubs[i].first << ":\n" + << (TD->getPointerSize() == 8 ? + MAI->getData64bitsDirective() : MAI->getData32bitsDirective()) + << *Stubs[i].second << '\n'; + + Stubs.clear(); + } + } } diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h index 6a9262d..039214a 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h @@ -36,8 +36,9 @@ class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter { const X86Subtarget *Subtarget; public: explicit X86AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, - const MCAsmInfo *T, bool V) - : AsmPrinter(O, TM, T, V) { + MCContext &Ctx, MCStreamer &Streamer, + const MCAsmInfo *T) + : AsmPrinter(O, TM, Ctx, Streamer, T) { Subtarget = &TM.getSubtarget<X86Subtarget>(); } @@ -57,14 +58,10 @@ class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter { virtual void EmitEndOfAsmFile(Module &M); - void printInstructionThroughMCStreamer(const MachineInstr *MI); - - - void printMCInst(const MCInst *MI); - - void printSymbolOperand(const MachineOperand &MO); - + virtual void EmitInstruction(const MachineInstr *MI); + void printSymbolOperand(const MachineOperand &MO); + virtual MCSymbol *GetGlobalValueSymbol(const GlobalValue *GV) const; // These methods are used by the tablegen'erated instruction printer. void printOperand(const MachineInstr *MI, unsigned OpNo, @@ -124,24 +121,12 @@ class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter { const char *Modifier=NULL); void printLeaMemReference(const MachineInstr *MI, unsigned Op, const char *Modifier=NULL); - void printPICJumpTableSetLabel(unsigned uid, - const MachineBasicBlock *MBB) const; - void printPICJumpTableSetLabel(unsigned uid, unsigned uid2, - const MachineBasicBlock *MBB) const { - AsmPrinter::printPICJumpTableSetLabel(uid, uid2, MBB); - } - void printPICJumpTableEntry(const MachineJumpTableInfo *MJTI, - const MachineBasicBlock *MBB, - unsigned uid) const; void printPICLabel(const MachineInstr *MI, unsigned Op); void PrintPICBaseSymbol() const; bool runOnMachineFunction(MachineFunction &F); - - void emitFunctionHeader(const MachineFunction &MF); - }; } // end namespace llvm diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp index 4efb529..610beb5 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp @@ -24,11 +24,14 @@ using namespace llvm; // Include the auto-generated portion of the assembly writer. #define MachineInstr MCInst -#define NO_ASM_WRITER_BOILERPLATE +#define GET_INSTRUCTION_NAME #include "X86GenAsmWriter1.inc" #undef MachineInstr void X86IntelInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); } +StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const { + return getInstructionName(Opcode); +} void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op) { switch (MI->getOperand(Op).getImm()) { diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h index 1976177..545bf84 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h @@ -26,10 +26,12 @@ public: : MCInstPrinter(O, MAI) {} virtual void printInst(const MCInst *MI); + virtual StringRef getOpcodeName(unsigned Opcode) const; // Autogenerated by tblgen. void printInstruction(const MCInst *MI); static const char *getRegisterName(unsigned RegNo); + static const char *getInstructionName(unsigned Opcode); void printOperand(const MCInst *MI, unsigned OpNo, diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index b970d46..fa8d13d 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -14,8 +14,9 @@ #include "X86MCInstLower.h" #include "X86AsmPrinter.h" -#include "X86MCAsmInfo.h" #include "X86COFFMachineModuleInfo.h" +#include "X86MCAsmInfo.h" +#include "X86MCTargetExpr.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCContext.h" @@ -25,6 +26,7 @@ #include "llvm/Target/Mangler.h" #include "llvm/Support/FormattedStream.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Type.h" using namespace llvm; @@ -39,37 +41,45 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { MCSymbol *X86MCInstLower::GetPICBaseSymbol() const { - return Ctx.GetOrCreateSymbol(Twine(AsmPrinter.MAI->getPrivateGlobalPrefix())+ - Twine(AsmPrinter.getFunctionNumber())+"$pb"); + const TargetLowering *TLI = AsmPrinter.TM.getTargetLowering(); + return static_cast<const X86TargetLowering*>(TLI)-> + getPICBaseSymbol(AsmPrinter.MF, Ctx); } -/// LowerGlobalAddressOperand - Lower an MO_GlobalAddress operand to an -/// MCOperand. +/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol +/// operand to an MCSymbol. MCSymbol *X86MCInstLower:: -GetGlobalAddressSymbol(const MachineOperand &MO) const { - const GlobalValue *GV = MO.getGlobal(); - - bool isImplicitlyPrivate = false; - if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB || - MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY || - MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE || - MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE) - isImplicitlyPrivate = true; - +GetSymbolFromOperand(const MachineOperand &MO) const { + assert((MO.isGlobal() || MO.isSymbol()) && "Isn't a symbol reference"); + SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate); - if (getSubtarget().isTargetCygMing()) { - X86COFFMachineModuleInfo &COFFMMI = - AsmPrinter.MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); - COFFMMI.DecorateCygMingName(Name, GV, *AsmPrinter.TM.getTargetData()); - } + if (MO.isGlobal()) { + bool isImplicitlyPrivate = false; + if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB || + MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY || + MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE || + MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE) + isImplicitlyPrivate = true; + + const GlobalValue *GV = MO.getGlobal(); + Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate); + if (getSubtarget().isTargetCygMing()) { + X86COFFMachineModuleInfo &COFFMMI = + AsmPrinter.MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); + COFFMMI.DecorateCygMingName(Name, GV, *AsmPrinter.TM.getTargetData()); + } + } else { + assert(MO.isSymbol()); + Name += AsmPrinter.MAI->getGlobalPrefix(); + Name += MO.getSymbolName(); + } + + // If the target flags on the operand changes the name of the symbol, do that + // before we return the symbol. switch (MO.getTargetFlags()) { - default: llvm_unreachable("Unknown target flag on GV operand"); - case X86II::MO_NO_FLAG: // No flag. - case X86II::MO_PIC_BASE_OFFSET: // Doesn't modify symbol name. - break; + default: break; case X86II::MO_DLLIMPORT: { // Handle dllimport linkage. const char *Prefix = "__imp_"; @@ -81,190 +91,72 @@ GetGlobalAddressSymbol(const MachineOperand &MO) const { Name += "$non_lazy_ptr"; MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); - const MCSymbol *&StubSym = getMachOMMI().getGVStubEntry(Sym); - if (StubSym == 0) - StubSym = AsmPrinter.GetGlobalValueSymbol(GV); + MCSymbol *&StubSym = getMachOMMI().getGVStubEntry(Sym); + if (StubSym == 0) { + assert(MO.isGlobal() && "Extern symbol not handled yet"); + StubSym = AsmPrinter.GetGlobalValueSymbol(MO.getGlobal()); + } return Sym; } case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: { Name += "$non_lazy_ptr"; MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); - const MCSymbol *&StubSym = getMachOMMI().getHiddenGVStubEntry(Sym); - if (StubSym == 0) - StubSym = AsmPrinter.GetGlobalValueSymbol(GV); + MCSymbol *&StubSym = getMachOMMI().getHiddenGVStubEntry(Sym); + if (StubSym == 0) { + assert(MO.isGlobal() && "Extern symbol not handled yet"); + StubSym = AsmPrinter.GetGlobalValueSymbol(MO.getGlobal()); + } return Sym; } case X86II::MO_DARWIN_STUB: { Name += "$stub"; MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); - const MCSymbol *&StubSym = getMachOMMI().getFnStubEntry(Sym); - if (StubSym == 0) - StubSym = AsmPrinter.GetGlobalValueSymbol(GV); - return Sym; - } - // FIXME: These probably should be a modifier on the symbol or something?? - case X86II::MO_TLSGD: Name += "@TLSGD"; break; - case X86II::MO_GOTTPOFF: Name += "@GOTTPOFF"; break; - case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break; - case X86II::MO_TPOFF: Name += "@TPOFF"; break; - case X86II::MO_NTPOFF: Name += "@NTPOFF"; break; - case X86II::MO_GOTPCREL: Name += "@GOTPCREL"; break; - case X86II::MO_GOT: Name += "@GOT"; break; - case X86II::MO_GOTOFF: Name += "@GOTOFF"; break; - case X86II::MO_PLT: Name += "@PLT"; break; - } - - return Ctx.GetOrCreateSymbol(Name.str()); -} - -MCSymbol *X86MCInstLower:: -GetExternalSymbolSymbol(const MachineOperand &MO) const { - SmallString<128> Name; - Name += AsmPrinter.MAI->getGlobalPrefix(); - Name += MO.getSymbolName(); - - switch (MO.getTargetFlags()) { - default: llvm_unreachable("Unknown target flag on GV operand"); - case X86II::MO_NO_FLAG: // No flag. - case X86II::MO_GOT_ABSOLUTE_ADDRESS: // Doesn't modify symbol name. - case X86II::MO_PIC_BASE_OFFSET: // Doesn't modify symbol name. - break; - case X86II::MO_DLLIMPORT: { - // Handle dllimport linkage. - const char *Prefix = "__imp_"; - Name.insert(Name.begin(), Prefix, Prefix+strlen(Prefix)); - break; - } - case X86II::MO_DARWIN_STUB: { - Name += "$stub"; - MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); - const MCSymbol *&StubSym = getMachOMMI().getFnStubEntry(Sym); - - if (StubSym == 0) { + MCSymbol *&StubSym = getMachOMMI().getFnStubEntry(Sym); + if (StubSym) + return Sym; + + if (MO.isGlobal()) { + StubSym = AsmPrinter.GetGlobalValueSymbol(MO.getGlobal()); + } else { Name.erase(Name.end()-5, Name.end()); StubSym = Ctx.GetOrCreateSymbol(Name.str()); } return Sym; } - // FIXME: These probably should be a modifier on the symbol or something?? - case X86II::MO_TLSGD: Name += "@TLSGD"; break; - case X86II::MO_GOTTPOFF: Name += "@GOTTPOFF"; break; - case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break; - case X86II::MO_TPOFF: Name += "@TPOFF"; break; - case X86II::MO_NTPOFF: Name += "@NTPOFF"; break; - case X86II::MO_GOTPCREL: Name += "@GOTPCREL"; break; - case X86II::MO_GOT: Name += "@GOT"; break; - case X86II::MO_GOTOFF: Name += "@GOTOFF"; break; - case X86II::MO_PLT: Name += "@PLT"; break; - } - - return Ctx.GetOrCreateSymbol(Name.str()); -} - -MCSymbol *X86MCInstLower::GetJumpTableSymbol(const MachineOperand &MO) const { - SmallString<256> Name; - // FIXME: Use AsmPrinter.GetJTISymbol. @TLSGD shouldn't be part of the symbol - // name! - raw_svector_ostream(Name) << AsmPrinter.MAI->getPrivateGlobalPrefix() << "JTI" - << AsmPrinter.getFunctionNumber() << '_' << MO.getIndex(); - - switch (MO.getTargetFlags()) { - default: - llvm_unreachable("Unknown target flag on GV operand"); - case X86II::MO_NO_FLAG: // No flag. - case X86II::MO_PIC_BASE_OFFSET: - case X86II::MO_DARWIN_NONLAZY_PIC_BASE: - case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: - break; - // FIXME: These probably should be a modifier on the symbol or something?? - case X86II::MO_TLSGD: Name += "@TLSGD"; break; - case X86II::MO_GOTTPOFF: Name += "@GOTTPOFF"; break; - case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break; - case X86II::MO_TPOFF: Name += "@TPOFF"; break; - case X86II::MO_NTPOFF: Name += "@NTPOFF"; break; - case X86II::MO_GOTPCREL: Name += "@GOTPCREL"; break; - case X86II::MO_GOT: Name += "@GOT"; break; - case X86II::MO_GOTOFF: Name += "@GOTOFF"; break; - case X86II::MO_PLT: Name += "@PLT"; break; } - - // Create a symbol for the name. - return Ctx.GetOrCreateSymbol(Name.str()); -} - -MCSymbol *X86MCInstLower:: -GetConstantPoolIndexSymbol(const MachineOperand &MO) const { - SmallString<256> Name; - // FIXME: USe AsmPrinter.GetCPISymbol. @TLSGD shouldn't be part of the symbol - // name! - raw_svector_ostream(Name) << AsmPrinter.MAI->getPrivateGlobalPrefix() << "CPI" - << AsmPrinter.getFunctionNumber() << '_' << MO.getIndex(); - - switch (MO.getTargetFlags()) { - default: - llvm_unreachable("Unknown target flag on GV operand"); - case X86II::MO_NO_FLAG: // No flag. - case X86II::MO_PIC_BASE_OFFSET: - case X86II::MO_DARWIN_NONLAZY_PIC_BASE: - case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: - break; - // FIXME: These probably should be a modifier on the symbol or something?? - case X86II::MO_TLSGD: Name += "@TLSGD"; break; - case X86II::MO_GOTTPOFF: Name += "@GOTTPOFF"; break; - case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break; - case X86II::MO_TPOFF: Name += "@TPOFF"; break; - case X86II::MO_NTPOFF: Name += "@NTPOFF"; break; - case X86II::MO_GOTPCREL: Name += "@GOTPCREL"; break; - case X86II::MO_GOT: Name += "@GOT"; break; - case X86II::MO_GOTOFF: Name += "@GOTOFF"; break; - case X86II::MO_PLT: Name += "@PLT"; break; - } - - // Create a symbol for the name. return Ctx.GetOrCreateSymbol(Name.str()); } -MCSymbol *X86MCInstLower:: -GetBlockAddressSymbol(const MachineOperand &MO) const { - const char *Suffix = ""; - switch (MO.getTargetFlags()) { - default: llvm_unreachable("Unknown target flag on BA operand"); - case X86II::MO_NO_FLAG: break; // No flag. - case X86II::MO_PIC_BASE_OFFSET: break; // Doesn't modify symbol name. - case X86II::MO_GOTOFF: Suffix = "@GOTOFF"; break; - } - - return AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress(), Suffix); -} - MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { // FIXME: We would like an efficient form for this, so we don't have to do a // lot of extra uniquing. - const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + const MCExpr *Expr = 0; + X86MCTargetExpr::VariantKind RefKind = X86MCTargetExpr::Invalid; switch (MO.getTargetFlags()) { default: llvm_unreachable("Unknown target flag on GV operand"); case X86II::MO_NO_FLAG: // No flag. - // These affect the name of the symbol, not any suffix. case X86II::MO_DARWIN_NONLAZY: case X86II::MO_DLLIMPORT: case X86II::MO_DARWIN_STUB: - case X86II::MO_TLSGD: - case X86II::MO_GOTTPOFF: - case X86II::MO_INDNTPOFF: - case X86II::MO_TPOFF: - case X86II::MO_NTPOFF: - case X86II::MO_GOTPCREL: - case X86II::MO_GOT: - case X86II::MO_GOTOFF: - case X86II::MO_PLT: break; + + case X86II::MO_TLSGD: RefKind = X86MCTargetExpr::TLSGD; break; + case X86II::MO_GOTTPOFF: RefKind = X86MCTargetExpr::GOTTPOFF; break; + case X86II::MO_INDNTPOFF: RefKind = X86MCTargetExpr::INDNTPOFF; break; + case X86II::MO_TPOFF: RefKind = X86MCTargetExpr::TPOFF; break; + case X86II::MO_NTPOFF: RefKind = X86MCTargetExpr::NTPOFF; break; + case X86II::MO_GOTPCREL: RefKind = X86MCTargetExpr::GOTPCREL; break; + case X86II::MO_GOT: RefKind = X86MCTargetExpr::GOT; break; + case X86II::MO_GOTOFF: RefKind = X86MCTargetExpr::GOTOFF; break; + case X86II::MO_PLT: RefKind = X86MCTargetExpr::PLT; break; case X86II::MO_PIC_BASE_OFFSET: case X86II::MO_DARWIN_NONLAZY_PIC_BASE: case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: + Expr = MCSymbolRefExpr::Create(Sym, Ctx); // Subtract the pic base. Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(GetPICBaseSymbol(), Ctx), @@ -272,6 +164,13 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, break; } + if (Expr == 0) { + if (RefKind == X86MCTargetExpr::Invalid) + Expr = MCSymbolRefExpr::Create(Sym, Ctx); + else + Expr = X86MCTargetExpr::Create(Sym, RefKind, Ctx); + } + if (!MO.isJTI() && MO.getOffset()) Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), @@ -300,6 +199,17 @@ static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) { } } +/// LowerSubReg32_Op0 - Things like MOVZX16rr8 -> MOVZX32rr8. +static void LowerSubReg32_Op0(MCInst &OutMI, unsigned NewOpc) { + OutMI.setOpcode(NewOpc); + lower_subreg32(&OutMI, 0); +} +/// LowerUnaryToTwoAddr - R = setb -> R = sbb R, R +static void LowerUnaryToTwoAddr(MCInst &OutMI, unsigned NewOpc) { + OutMI.setOpcode(NewOpc); + OutMI.addOperand(OutMI.getOperand(0)); + OutMI.addOperand(OutMI.getOperand(0)); +} void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { @@ -323,22 +233,23 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { break; case MachineOperand::MO_MachineBasicBlock: MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( - AsmPrinter.GetMBBSymbol(MO.getMBB()->getNumber()), Ctx)); + MO.getMBB()->getSymbol(Ctx), Ctx)); break; case MachineOperand::MO_GlobalAddress: - MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); + MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO)); break; case MachineOperand::MO_ExternalSymbol: - MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO)); + MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO)); break; case MachineOperand::MO_JumpTableIndex: - MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO)); + MCOp = LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex())); break; case MachineOperand::MO_ConstantPoolIndex: - MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO)); + MCOp = LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex())); break; case MachineOperand::MO_BlockAddress: - MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO)); + MCOp = LowerSymbolOperand(MO, + AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress())); break; } @@ -350,72 +261,48 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand. lower_lea64_32mem(&OutMI, 1); break; - case X86::MOVZX16rr8: - OutMI.setOpcode(X86::MOVZX32rr8); - lower_subreg32(&OutMI, 0); - break; - case X86::MOVZX16rm8: - OutMI.setOpcode(X86::MOVZX32rm8); - lower_subreg32(&OutMI, 0); - break; - case X86::MOVSX16rr8: - OutMI.setOpcode(X86::MOVSX32rr8); - lower_subreg32(&OutMI, 0); - break; - case X86::MOVSX16rm8: - OutMI.setOpcode(X86::MOVSX32rm8); - lower_subreg32(&OutMI, 0); - break; - case X86::MOVZX64rr32: - OutMI.setOpcode(X86::MOV32rr); - lower_subreg32(&OutMI, 0); - break; - case X86::MOVZX64rm32: - OutMI.setOpcode(X86::MOV32rm); - lower_subreg32(&OutMI, 0); - break; - case X86::MOV64ri64i32: - OutMI.setOpcode(X86::MOV32ri); - lower_subreg32(&OutMI, 0); - break; - case X86::MOVZX64rr8: - OutMI.setOpcode(X86::MOVZX32rr8); - lower_subreg32(&OutMI, 0); - break; - case X86::MOVZX64rm8: - OutMI.setOpcode(X86::MOVZX32rm8); - lower_subreg32(&OutMI, 0); - break; - case X86::MOVZX64rr16: - OutMI.setOpcode(X86::MOVZX32rr16); - lower_subreg32(&OutMI, 0); - break; - case X86::MOVZX64rm16: - OutMI.setOpcode(X86::MOVZX32rm16); - lower_subreg32(&OutMI, 0); - break; + case X86::MOVZX16rr8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break; + case X86::MOVZX16rm8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break; + case X86::MOVSX16rr8: LowerSubReg32_Op0(OutMI, X86::MOVSX32rr8); break; + case X86::MOVSX16rm8: LowerSubReg32_Op0(OutMI, X86::MOVSX32rm8); break; + case X86::MOVZX64rr32: LowerSubReg32_Op0(OutMI, X86::MOV32rr); break; + case X86::MOVZX64rm32: LowerSubReg32_Op0(OutMI, X86::MOV32rm); break; + case X86::MOV64ri64i32: LowerSubReg32_Op0(OutMI, X86::MOV32ri); break; + case X86::MOVZX64rr8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break; + case X86::MOVZX64rm8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break; + case X86::MOVZX64rr16: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr16); break; + case X86::MOVZX64rm16: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm16); break; + case X86::SETB_C8r: LowerUnaryToTwoAddr(OutMI, X86::SBB8rr); break; + case X86::SETB_C16r: LowerUnaryToTwoAddr(OutMI, X86::SBB16rr); break; + case X86::SETB_C32r: LowerUnaryToTwoAddr(OutMI, X86::SBB32rr); break; + case X86::SETB_C64r: LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break; + case X86::MOV8r0: LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break; + case X86::MOV32r0: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break; + case X86::MMX_V_SET0: LowerUnaryToTwoAddr(OutMI, X86::MMX_PXORrr); break; + case X86::MMX_V_SETALLONES: + LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break; + case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; + case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break; + case X86::V_SET0: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break; + case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break; + case X86::MOV16r0: - OutMI.setOpcode(X86::MOV32r0); - lower_subreg32(&OutMI, 0); + LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0 + LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr break; case X86::MOV64r0: - OutMI.setOpcode(X86::MOV32r0); - lower_subreg32(&OutMI, 0); + LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV64r0 -> MOV32r0 + LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr break; } } -void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { +void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { X86MCInstLower MCInstLowering(OutContext, Mang, *this); switch (MI->getOpcode()) { - case TargetInstrInfo::DBG_LABEL: - case TargetInstrInfo::EH_LABEL: - case TargetInstrInfo::GC_LABEL: - printLabel(MI); - return; - case TargetInstrInfo::DEBUG_VALUE: { + case TargetOpcode::DBG_VALUE: { // FIXME: if this is implemented for another target before it goes // away completely, the common part should be moved into AsmPrinter. if (!VerboseAsm) @@ -427,9 +314,35 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { O << V.getName(); O << " <- "; if (NOps==3) { - // Variable is in register - assert(MI->getOperand(0).getType()==MachineOperand::MO_Register); - printOperand(MI, 0); + // Register or immediate value. Register 0 means undef. + assert(MI->getOperand(0).getType()==MachineOperand::MO_Register || + MI->getOperand(0).getType()==MachineOperand::MO_Immediate || + MI->getOperand(0).getType()==MachineOperand::MO_FPImmediate); + if (MI->getOperand(0).getType()==MachineOperand::MO_Register && + MI->getOperand(0).getReg()==0) { + // Suppress offset in this case, it is not meaningful. + O << "undef"; + OutStreamer.AddBlankLine(); + return; + } else if (MI->getOperand(0).getType()==MachineOperand::MO_FPImmediate) { + // This is more naturally done in printOperand, but since the only use + // of such an operand is in this comment and that is temporary (and it's + // ugly), we prefer to keep this localized. + // The include of Type.h may be removable when this code is. + if (MI->getOperand(0).getFPImm()->getType()->isFloatTy() || + MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) + MI->getOperand(0).print(O, &TM); + else { + // There is no good way to print long double. Convert a copy to + // double. Ah well, it's only a comment. + bool ignored; + APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, + &ignored); + O << "(long double) " << APF.convertToDouble(); + } + } else + printOperand(MI, 0); } else { // Frame address. Currently handles register +- offset only. assert(MI->getOperand(0).getType()==MachineOperand::MO_Register); @@ -438,17 +351,9 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { } O << "+"; printOperand(MI, NOps-2); + OutStreamer.AddBlankLine(); return; } - case TargetInstrInfo::INLINEASM: - printInlineAsm(MI); - return; - case TargetInstrInfo::IMPLICIT_DEF: - printImplicitDef(MI); - return; - case TargetInstrInfo::KILL: - printKill(MI); - return; case X86::MOVPC32r: { MCInst TmpInst; // This is a pseudo op for a two instruction sequence with a label, which @@ -464,8 +369,7 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { // lot of extra uniquing. TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase, OutContext))); - printMCInst(&TmpInst); - O << '\n'; + OutStreamer.EmitInstruction(TmpInst); // Emit the label. OutStreamer.EmitLabel(PICBase); @@ -473,7 +377,7 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { // popl $reg TmpInst.setOpcode(X86::POP32r); TmpInst.getOperand(0) = MCOperand::CreateReg(MI->getOperand(0).getReg()); - printMCInst(&TmpInst); + OutStreamer.EmitInstruction(TmpInst); return; } @@ -495,7 +399,7 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { OutStreamer.EmitLabel(DotSym); // Now that we have emitted the label, lower the complex operand expression. - MCSymbol *OpSym = MCInstLowering.GetExternalSymbolSymbol(MI->getOperand(2)); + MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2)); const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext); const MCExpr *PICBase = @@ -510,7 +414,7 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); TmpInst.addOperand(MCOperand::CreateExpr(DotExpr)); - printMCInst(&TmpInst); + OutStreamer.EmitInstruction(TmpInst); return; } } @@ -518,7 +422,6 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); - - printMCInst(&TmpInst); + OutStreamer.EmitInstruction(TmpInst); } diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/AsmPrinter/X86MCInstLower.h index 94f8bfc..ebd23f6 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.h +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.h @@ -39,11 +39,7 @@ public: MCSymbol *GetPICBaseSymbol() const; - MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; - MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; - MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const; - MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const; - MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const; + MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; private: diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 4186fec..61f26a7 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -25,6 +25,8 @@ set(sources X86InstrInfo.cpp X86JITInfo.cpp X86MCAsmInfo.cpp + X86MCCodeEmitter.cpp + X86MCTargetExpr.cpp X86RegisterInfo.cpp X86Subtarget.cpp X86TargetMachine.cpp diff --git a/lib/Target/X86/Disassembler/Makefile b/lib/Target/X86/Disassembler/Makefile index 6c26853..b289647 100644 --- a/lib/Target/X86/Disassembler/Makefile +++ b/lib/Target/X86/Disassembler/Makefile @@ -9,7 +9,6 @@ LEVEL = ../../../.. LIBRARYNAME = LLVMX86Disassembler -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' x86 target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile index 5e625dc..f4ff894 100644 --- a/lib/Target/X86/Makefile +++ b/lib/Target/X86/Makefile @@ -10,7 +10,6 @@ LEVEL = ../../.. LIBRARYNAME = LLVMX86CodeGen TARGET = X86 -CXXFLAGS = -fno-rtti # Make sure that tblgen is run, first thing. BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \ @@ -19,6 +18,7 @@ BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \ X86GenAsmWriter1.inc X86GenDAGISel.inc \ X86GenDisassemblerTables.inc X86GenFastISel.inc \ X86GenCallingConv.inc X86GenSubtarget.inc \ + X86GenEDInfo.inc DIRS = AsmPrinter AsmParser Disassembler TargetInfo diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 0f3e44b..19eb05e 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -376,7 +376,7 @@ ret ... saving two instructions. The basic idea is that a reload from a spill slot, can, if only one 4-byte -chunk is used, bring in 3 zeros the the one element instead of 4 elements. +chunk is used, bring in 3 zeros the one element instead of 4 elements. This can be used to simplify a variety of shuffle operations, where the elements are fixed zeros. @@ -936,3 +936,54 @@ Also, the 'ret's should be shared. This is PR6032. //===---------------------------------------------------------------------===// +These should compile into the same code (PR6214): Perhaps instcombine should +canonicalize the former into the later? + +define float @foo(float %x) nounwind { + %t = bitcast float %x to i32 + %s = and i32 %t, 2147483647 + %d = bitcast i32 %s to float + ret float %d +} + +declare float @fabsf(float %n) +define float @bar(float %x) nounwind { + %d = call float @fabsf(float %x) + ret float %d +} + +//===---------------------------------------------------------------------===// + +This IR (from PR6194): + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +%0 = type { double, double } +%struct.float3 = type { float, float, float } + +define void @test(%0, %struct.float3* nocapture %res) nounwind noinline ssp { +entry: + %tmp18 = extractvalue %0 %0, 0 ; <double> [#uses=1] + %tmp19 = bitcast double %tmp18 to i64 ; <i64> [#uses=1] + %tmp20 = zext i64 %tmp19 to i128 ; <i128> [#uses=1] + %tmp10 = lshr i128 %tmp20, 32 ; <i128> [#uses=1] + %tmp11 = trunc i128 %tmp10 to i32 ; <i32> [#uses=1] + %tmp12 = bitcast i32 %tmp11 to float ; <float> [#uses=1] + %tmp5 = getelementptr inbounds %struct.float3* %res, i64 0, i32 1 ; <float*> [#uses=1] + store float %tmp12, float* %tmp5 + ret void +} + +Compiles to: + +_test: ## @test + movd %xmm0, %rax + shrq $32, %rax + movl %eax, 4(%rdi) + ret + +This would be better kept in the SSE unit by treating XMM0 as a 4xfloat and +doing a shuffle from v[1] to v[0] then a float store. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/README-UNIMPLEMENTED.txt b/lib/Target/X86/README-UNIMPLEMENTED.txt index 69dc8ee..c26c75a 100644 --- a/lib/Target/X86/README-UNIMPLEMENTED.txt +++ b/lib/Target/X86/README-UNIMPLEMENTED.txt @@ -11,4 +11,4 @@ which would be great. 2) vector comparisons 3) vector fp<->int conversions: PR2683, PR2684, PR2685, PR2686, PR2688 4) bitcasts from vectors to scalars: PR2804 - +5) llvm.atomic.cmp.swap.i128.p0i128: PR3462 diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index aa7bb3d..3c6138b 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1868,3 +1868,69 @@ carried over to machine instructions. Asm printer (or JIT) can use this information to add the "lock" prefix. //===---------------------------------------------------------------------===// + +_Bool bar(int *x) { return *x & 1; } + +define zeroext i1 @bar(i32* nocapture %x) nounwind readonly { +entry: + %tmp1 = load i32* %x ; <i32> [#uses=1] + %and = and i32 %tmp1, 1 ; <i32> [#uses=1] + %tobool = icmp ne i32 %and, 0 ; <i1> [#uses=1] + ret i1 %tobool +} + +bar: # @bar +# BB#0: # %entry + movl 4(%esp), %eax + movb (%eax), %al + andb $1, %al + movzbl %al, %eax + ret + +Missed optimization: should be movl+andl. + +//===---------------------------------------------------------------------===// + +Consider the following two functions compiled with clang: +_Bool foo(int *x) { return !(*x & 4); } +unsigned bar(int *x) { return !(*x & 4); } + +foo: + movl 4(%esp), %eax + testb $4, (%eax) + sete %al + movzbl %al, %eax + ret + +bar: + movl 4(%esp), %eax + movl (%eax), %eax + shrl $2, %eax + andl $1, %eax + xorl $1, %eax + ret + +The second function generates more code even though the two functions are +are functionally identical. + +//===---------------------------------------------------------------------===// + +Take the following C code: +int x(int y) { return (y & 63) << 14; } + +Code produced by gcc: + andl $63, %edi + sall $14, %edi + movl %edi, %eax + ret + +Code produced by clang: + shll $14, %edi + movl %edi, %eax + andl $1032192, %eax + ret + +The code produced by gcc is 3 bytes shorter. This sort of construct often +shows up with bitfields. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/TargetInfo/Makefile b/lib/Target/X86/TargetInfo/Makefile index 211607f..9858e6a 100644 --- a/lib/Target/X86/TargetInfo/Makefile +++ b/lib/Target/X86/TargetInfo/Makefile @@ -9,7 +9,6 @@ LEVEL = ../../../.. LIBRARYNAME = LLVMX86Info -CXXFLAGS = -fno-rtti # Hack: we need to include 'main' target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 684c61f..1a1e447 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -23,6 +23,7 @@ class X86TargetMachine; class FunctionPass; class MachineCodeEmitter; class MCCodeEmitter; +class MCContext; class JITCodeEmitter; class Target; class formatted_raw_ostream; @@ -46,15 +47,13 @@ FunctionPass *createX87FPRegKillInserterPass(); /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code /// to the specified MCE object. - -FunctionPass *createX86CodeEmitterPass(X86TargetMachine &TM, - MachineCodeEmitter &MCE); FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, JITCodeEmitter &JCE); -FunctionPass *createX86ObjectCodeEmitterPass(X86TargetMachine &TM, - ObjectCodeEmitter &OCE); -MCCodeEmitter *createX86MCCodeEmitter(const Target &, TargetMachine &TM); +MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM, + MCContext &Ctx); +MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM, + MCContext &Ctx); /// createX86EmitCodeToMemory - Returns a pass that converts a register /// allocated function into raw machine code in a dynamically diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp index ea52795..ab67acb 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.cpp +++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp @@ -27,90 +27,55 @@ X86COFFMachineModuleInfo::X86COFFMachineModuleInfo(const MachineModuleInfo &) { X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() { } -void X86COFFMachineModuleInfo::AddFunctionInfo(const Function *F, - const X86MachineFunctionInfo &Val) { - FunctionInfoMap[F] = Val; +void X86COFFMachineModuleInfo::addExternalFunction(const StringRef& Name) { + CygMingStubs.insert(Name); } - - -static X86MachineFunctionInfo calculateFunctionInfo(const Function *F, - const TargetData &TD) { - X86MachineFunctionInfo Info; - uint64_t Size = 0; - - switch (F->getCallingConv()) { - case CallingConv::X86_StdCall: - Info.setDecorationStyle(StdCall); - break; - case CallingConv::X86_FastCall: - Info.setDecorationStyle(FastCall); - break; - default: - return Info; - } - - unsigned argNum = 1; - for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - AI != AE; ++AI, ++argNum) { - const Type* Ty = AI->getType(); - - // 'Dereference' type in case of byval parameter attribute - if (F->paramHasAttr(argNum, Attribute::ByVal)) - Ty = cast<PointerType>(Ty)->getElementType(); - - // Size should be aligned to DWORD boundary - Size += ((TD.getTypeAllocSize(Ty) + 3)/4)*4; - } - - // We're not supporting tooooo huge arguments :) - Info.setBytesToPopOnReturn((unsigned int)Size); - return Info; -} - - -/// DecorateCygMingName - Query FunctionInfoMap and use this information for -/// various name decorations for Cygwin and MingW. +/// DecorateCygMingName - Apply various name decorations if the function uses +/// stdcall or fastcall calling convention. void X86COFFMachineModuleInfo::DecorateCygMingName(SmallVectorImpl<char> &Name, const GlobalValue *GV, const TargetData &TD) { const Function *F = dyn_cast<Function>(GV); if (!F) return; - - // Save function name for later type emission. - if (F->isDeclaration()) - CygMingStubs.insert(StringRef(Name.data(), Name.size())); - + // We don't want to decorate non-stdcall or non-fastcall functions right now CallingConv::ID CC = F->getCallingConv(); if (CC != CallingConv::X86_StdCall && CC != CallingConv::X86_FastCall) return; - - const X86MachineFunctionInfo *Info; - - FMFInfoMap::const_iterator info_item = FunctionInfoMap.find(F); - if (info_item == FunctionInfoMap.end()) { - // Calculate apropriate function info and populate map - FunctionInfoMap[F] = calculateFunctionInfo(F, TD); - Info = &FunctionInfoMap[F]; - } else { - Info = &info_item->second; - } - - if (Info->getDecorationStyle() == None) return; + + unsigned ArgWords = 0; + DenseMap<const Function*, unsigned>::const_iterator item = FnArgWords.find(F); + if (item == FnArgWords.end()) { + // Calculate arguments sizes + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) { + const Type* Ty = AI->getType(); + + // 'Dereference' type in case of byval parameter attribute + if (AI->hasByValAttr()) + Ty = cast<PointerType>(Ty)->getElementType(); + + // Size should be aligned to DWORD boundary + ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4; + } + + FnArgWords[F] = ArgWords; + } else + ArgWords = item->second; + const FunctionType *FT = F->getFunctionType(); - // "Pure" variadic functions do not receive @0 suffix. if (!FT->isVarArg() || FT->getNumParams() == 0 || (FT->getNumParams() == 1 && F->hasStructRetAttr())) - raw_svector_ostream(Name) << '@' << Info->getBytesToPopOnReturn(); - - if (Info->getDecorationStyle() == FastCall) { + raw_svector_ostream(Name) << '@' << ArgWords; + + if (CC == CallingConv::X86_FastCall) { if (Name[0] == '_') Name[0] = '@'; else Name.insert(Name.begin(), '@'); - } + } } /// DecorateCygMingName - Query FunctionInfoMap and use this information for @@ -121,6 +86,6 @@ void X86COFFMachineModuleInfo::DecorateCygMingName(MCSymbol *&Name, const TargetData &TD) { SmallString<128> NameStr(Name->getName().begin(), Name->getName().end()); DecorateCygMingName(NameStr, GV, TD); - + Name = Ctx.GetOrCreateSymbol(NameStr.str()); } diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h index 0e2009e..9de3dcd 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.h +++ b/lib/Target/X86/X86COFFMachineModuleInfo.h @@ -21,44 +21,25 @@ namespace llvm { class X86MachineFunctionInfo; class TargetData; - + /// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation /// for X86 COFF targets. class X86COFFMachineModuleInfo : public MachineModuleInfoImpl { StringSet<> CygMingStubs; - - // We have to propagate some information about MachineFunction to - // AsmPrinter. It's ok, when we're printing the function, since we have - // access to MachineFunction and can get the appropriate MachineFunctionInfo. - // Unfortunately, this is not possible when we're printing reference to - // Function (e.g. calling it and so on). Even more, there is no way to get the - // corresponding MachineFunctions: it can even be not created at all. That's - // why we should use additional structure, when we're collecting all necessary - // information. - // - // This structure is using e.g. for name decoration for stdcall & fastcall'ed - // function, since we have to use arguments' size for decoration. - typedef std::map<const Function*, X86MachineFunctionInfo> FMFInfoMap; - FMFInfoMap FunctionInfoMap; - + DenseMap<const Function*, unsigned> FnArgWords; public: X86COFFMachineModuleInfo(const MachineModuleInfo &); ~X86COFFMachineModuleInfo(); - - + void DecorateCygMingName(MCSymbol* &Name, MCContext &Ctx, const GlobalValue *GV, const TargetData &TD); void DecorateCygMingName(SmallVectorImpl<char> &Name, const GlobalValue *GV, const TargetData &TD); - - void AddFunctionInfo(const Function *F, const X86MachineFunctionInfo &Val); - + void addExternalFunction(const StringRef& Name); typedef StringSet<>::const_iterator stub_iterator; stub_iterator stub_begin() const { return CygMingStubs.begin(); } stub_iterator stub_end() const { return CygMingStubs.end(); } - - }; diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 828e872..8deadf6 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -21,9 +21,7 @@ #include "X86.h" #include "llvm/LLVMContext.h" #include "llvm/PassManager.h" -#include "llvm/CodeGen/MachineCodeEmitter.h" #include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/CodeGen/ObjectCodeEmitter.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -110,19 +108,10 @@ template<class CodeEmitter> /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code /// to the specified templated MachineCodeEmitter object. - -FunctionPass *llvm::createX86CodeEmitterPass(X86TargetMachine &TM, - MachineCodeEmitter &MCE) { - return new Emitter<MachineCodeEmitter>(TM, MCE); -} FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM, JITCodeEmitter &JCE) { return new Emitter<JITCodeEmitter>(TM, JCE); } -FunctionPass *llvm::createX86ObjectCodeEmitterPass(X86TargetMachine &TM, - ObjectCodeEmitter &OCE) { - return new Emitter<ObjectCodeEmitter>(TM, OCE); -} template<class CodeEmitter> bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { @@ -202,8 +191,15 @@ template<class CodeEmitter> void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES, unsigned Reloc) { intptr_t RelocCST = (Reloc == X86::reloc_picrel_word) ? PICBaseOffset : 0; + + // X86 never needs stubs because instruction selection will always pick + // an instruction sequence that is large enough to hold any address + // to a symbol. + // (see X86ISelLowering.cpp, near 2039: X86TargetLowering::LowerCall) + bool NeedStub = false; MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), - Reloc, ES, RelocCST)); + Reloc, ES, RelocCST, + 0, NeedStub)); if (Reloc == X86::reloc_absolute_dword) MCE.emitDWordLE(0); else @@ -253,7 +249,7 @@ void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTI, unsigned Reloc, template<class CodeEmitter> unsigned Emitter<CodeEmitter>::getX86RegNum(unsigned RegNo) const { - return II->getRegisterInfo().getX86RegNum(RegNo); + return X86RegisterInfo::getX86RegNum(RegNo); } inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode, @@ -391,86 +387,103 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, // If no BaseReg, issue a RIP relative instruction only if the MCE can // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table // 2-7) and absolute references. - if ((!Is64BitMode || DispForReloc || BaseReg != 0) && + unsigned BaseRegNo = -1U; + if (BaseReg != 0 && BaseReg != X86::RIP) + BaseRegNo = getX86RegNum(BaseReg); + + if (// The SIB byte must be used if there is an index register. IndexReg.getReg() == 0 && - ((BaseReg == 0 && MCE.earlyResolveAddresses()) || BaseReg == X86::RIP || - (BaseReg != 0 && getX86RegNum(BaseReg) != N86::ESP))) { - if (BaseReg == 0 || BaseReg == X86::RIP) { // Just a displacement? - // Emit special case [disp32] encoding + // The SIB byte must be used if the base is ESP/RSP/R12, all of which + // encode to an R/M value of 4, which indicates that a SIB byte is + // present. + BaseRegNo != N86::ESP && + // If there is no base register and we're in 64-bit mode, we need a SIB + // byte to emit an addr that is just 'disp32' (the non-RIP relative form). + (!Is64BitMode || BaseReg != 0)) { + if (BaseReg == 0 || // [disp32] in X86-32 mode + BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode MCE.emitByte(ModRMByte(0, RegOpcodeField, 5)); emitDisplacementField(DispForReloc, DispVal, PCAdj, true); - } else { - unsigned BaseRegNo = getX86RegNum(BaseReg); - if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { - // Emit simple indirect register encoding... [EAX] f.e. - MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo)); - } else if (!DispForReloc && isDisp8(DispVal)) { - // Emit the disp8 encoding... [REG+disp8] - MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo)); - emitConstant(DispVal, 1); - } else { - // Emit the most general non-SIB encoding: [REG+disp32] - MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo)); - emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); - } + return; } - - } else { // We need a SIB byte, so start by outputting the ModR/M byte first - assert(IndexReg.getReg() != X86::ESP && - IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); - - bool ForceDisp32 = false; - bool ForceDisp8 = false; - if (BaseReg == 0) { - // If there is no base register, we emit the special case SIB byte with - // MOD=0, BASE=5, to JUST get the index, scale, and displacement. - MCE.emitByte(ModRMByte(0, RegOpcodeField, 4)); - ForceDisp32 = true; - } else if (DispForReloc) { - // Emit the normal disp32 encoding. - MCE.emitByte(ModRMByte(2, RegOpcodeField, 4)); - ForceDisp32 = true; - } else if (DispVal == 0 && getX86RegNum(BaseReg) != N86::EBP) { - // Emit no displacement ModR/M byte - MCE.emitByte(ModRMByte(0, RegOpcodeField, 4)); - } else if (isDisp8(DispVal)) { - // Emit the disp8 encoding... - MCE.emitByte(ModRMByte(1, RegOpcodeField, 4)); - ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP - } else { - // Emit the normal disp32 encoding... - MCE.emitByte(ModRMByte(2, RegOpcodeField, 4)); - } - - // Calculate what the SS field value should be... - static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 }; - unsigned SS = SSTable[Scale.getImm()]; - - if (BaseReg == 0) { - // Handle the SIB byte for the case where there is no base, see Intel - // Manual 2A, table 2-7. The displacement has already been output. - unsigned IndexRegNo; - if (IndexReg.getReg()) - IndexRegNo = getX86RegNum(IndexReg.getReg()); - else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5) - IndexRegNo = 4; - emitSIBByte(SS, IndexRegNo, 5); - } else { - unsigned BaseRegNo = getX86RegNum(BaseReg); - unsigned IndexRegNo; - if (IndexReg.getReg()) - IndexRegNo = getX86RegNum(IndexReg.getReg()); - else - IndexRegNo = 4; // For example [ESP+1*<noreg>+4] - emitSIBByte(SS, IndexRegNo, BaseRegNo); + + // If the base is not EBP/ESP and there is no displacement, use simple + // indirect register encoding, this handles addresses like [EAX]. The + // encoding for [EBP] with no displacement means [disp32] so we handle it + // by emitting a displacement of 0 below. + if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { + MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo)); + return; } - - // Do we need to output a displacement? - if (ForceDisp8) { + + // Otherwise, if the displacement fits in a byte, encode as [REG+disp8]. + if (!DispForReloc && isDisp8(DispVal)) { + MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo)); emitConstant(DispVal, 1); - } else if (DispVal != 0 || ForceDisp32) { - emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); + return; } + + // Otherwise, emit the most general non-SIB encoding: [REG+disp32] + MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo)); + emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); + return; + } + + // Otherwise we need a SIB byte, so start by outputting the ModR/M byte first. + assert(IndexReg.getReg() != X86::ESP && + IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); + + bool ForceDisp32 = false; + bool ForceDisp8 = false; + if (BaseReg == 0) { + // If there is no base register, we emit the special case SIB byte with + // MOD=0, BASE=4, to JUST get the index, scale, and displacement. + MCE.emitByte(ModRMByte(0, RegOpcodeField, 4)); + ForceDisp32 = true; + } else if (DispForReloc) { + // Emit the normal disp32 encoding. + MCE.emitByte(ModRMByte(2, RegOpcodeField, 4)); + ForceDisp32 = true; + } else if (DispVal == 0 && getX86RegNum(BaseReg) != N86::EBP) { + // Emit no displacement ModR/M byte + MCE.emitByte(ModRMByte(0, RegOpcodeField, 4)); + } else if (isDisp8(DispVal)) { + // Emit the disp8 encoding... + MCE.emitByte(ModRMByte(1, RegOpcodeField, 4)); + ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP + } else { + // Emit the normal disp32 encoding... + MCE.emitByte(ModRMByte(2, RegOpcodeField, 4)); + } + + // Calculate what the SS field value should be... + static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 }; + unsigned SS = SSTable[Scale.getImm()]; + + if (BaseReg == 0) { + // Handle the SIB byte for the case where there is no base, see Intel + // Manual 2A, table 2-7. The displacement has already been output. + unsigned IndexRegNo; + if (IndexReg.getReg()) + IndexRegNo = getX86RegNum(IndexReg.getReg()); + else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5) + IndexRegNo = 4; + emitSIBByte(SS, IndexRegNo, 5); + } else { + unsigned BaseRegNo = getX86RegNum(BaseReg); + unsigned IndexRegNo; + if (IndexReg.getReg()) + IndexRegNo = getX86RegNum(IndexReg.getReg()); + else + IndexRegNo = 4; // For example [ESP+1*<noreg>+4] + emitSIBByte(SS, IndexRegNo, BaseRegNo); + } + + // Do we need to output a displacement? + if (ForceDisp8) { + emitConstant(DispVal, 1); + } else if (DispVal != 0 || ForceDisp32) { + emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); } } @@ -570,7 +583,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 --NumOps; - unsigned char BaseOpcode = II->getBaseOpcodeFor(Desc); + unsigned char BaseOpcode = X86II::getBaseOpcodeFor(Desc->TSFlags); switch (Desc->TSFlags & X86II::FormMask) { default: llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!"); @@ -582,25 +595,25 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, llvm_unreachable("psuedo instructions should be removed before code" " emission"); break; - case TargetInstrInfo::INLINEASM: + case TargetOpcode::INLINEASM: // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. if (MI.getOperand(0).getSymbolName()[0]) llvm_report_error("JIT does not support inline asm!"); break; - case TargetInstrInfo::DBG_LABEL: - case TargetInstrInfo::EH_LABEL: - case TargetInstrInfo::GC_LABEL: + case TargetOpcode::DBG_LABEL: + case TargetOpcode::EH_LABEL: + case TargetOpcode::GC_LABEL: MCE.emitLabel(MI.getOperand(0).getImm()); break; - case TargetInstrInfo::IMPLICIT_DEF: - case TargetInstrInfo::KILL: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: case X86::FP_REG_KILL: break; case X86::MOVPC32r: { // This emits the "call" portion of this pseudo instruction. MCE.emitByte(BaseOpcode); - emitConstant(0, X86InstrInfo::sizeOfImm(Desc)); + emitConstant(0, X86II::getSizeOfImm(Desc->TSFlags)); // Remember PIC base. PICBaseOffset = (intptr_t) MCE.getCurrentPCOffset(); X86JITInfo *JTI = TM.getJITInfo(); @@ -639,15 +652,21 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word); break; } + + // FIXME: Only used by hackish MCCodeEmitter, remove when dead. + if (MO.isJTI()) { + emitJumpTableAddress(MO.getIndex(), X86::reloc_pcrel_word); + break; + } assert(MO.isImm() && "Unknown RawFrm operand!"); if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) { // Fix up immediate operand for pc relative calls. intptr_t Imm = (intptr_t)MO.getImm(); Imm = Imm - MCE.getCurrentPCValue() - 4; - emitConstant(Imm, X86InstrInfo::sizeOfImm(Desc)); + emitConstant(Imm, X86II::getSizeOfImm(Desc->TSFlags)); } else - emitConstant(MO.getImm(), X86InstrInfo::sizeOfImm(Desc)); + emitConstant(MO.getImm(), X86II::getSizeOfImm(Desc->TSFlags)); break; } @@ -658,7 +677,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, break; const MachineOperand &MO1 = MI.getOperand(CurOp++); - unsigned Size = X86InstrInfo::sizeOfImm(Desc); + unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); if (MO1.isImm()) { emitConstant(MO1.getImm(), Size); break; @@ -691,7 +710,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, CurOp += 2; if (CurOp != NumOps) emitConstant(MI.getOperand(CurOp++).getImm(), - X86InstrInfo::sizeOfImm(Desc)); + X86II::getSizeOfImm(Desc->TSFlags)); break; } case X86II::MRMDestMem: { @@ -702,7 +721,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, CurOp += X86AddrNumOperands + 1; if (CurOp != NumOps) emitConstant(MI.getOperand(CurOp++).getImm(), - X86InstrInfo::sizeOfImm(Desc)); + X86II::getSizeOfImm(Desc->TSFlags)); break; } @@ -713,7 +732,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, CurOp += 2; if (CurOp != NumOps) emitConstant(MI.getOperand(CurOp++).getImm(), - X86InstrInfo::sizeOfImm(Desc)); + X86II::getSizeOfImm(Desc->TSFlags)); break; case X86II::MRMSrcMem: { @@ -726,7 +745,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, AddrOperands = X86AddrNumOperands; intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ? - X86InstrInfo::sizeOfImm(Desc) : 0; + X86II::getSizeOfImm(Desc->TSFlags) : 0; MCE.emitByte(BaseOpcode); emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()), @@ -734,7 +753,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, CurOp += AddrOperands + 1; if (CurOp != NumOps) emitConstant(MI.getOperand(CurOp++).getImm(), - X86InstrInfo::sizeOfImm(Desc)); + X86II::getSizeOfImm(Desc->TSFlags)); break; } @@ -743,33 +762,14 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, case X86II::MRM4r: case X86II::MRM5r: case X86II::MRM6r: case X86II::MRM7r: { MCE.emitByte(BaseOpcode); - - // Special handling of lfence, mfence, monitor, and mwait. - if (Desc->getOpcode() == X86::LFENCE || - Desc->getOpcode() == X86::MFENCE || - Desc->getOpcode() == X86::MONITOR || - Desc->getOpcode() == X86::MWAIT) { - emitRegModRMByte((Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); - - switch (Desc->getOpcode()) { - default: break; - case X86::MONITOR: - MCE.emitByte(0xC8); - break; - case X86::MWAIT: - MCE.emitByte(0xC9); - break; - } - } else { - emitRegModRMByte(MI.getOperand(CurOp++).getReg(), - (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); - } + emitRegModRMByte(MI.getOperand(CurOp++).getReg(), + (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); if (CurOp == NumOps) break; const MachineOperand &MO1 = MI.getOperand(CurOp++); - unsigned Size = X86InstrInfo::sizeOfImm(Desc); + unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); if (MO1.isImm()) { emitConstant(MO1.getImm(), Size); break; @@ -798,7 +798,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, case X86II::MRM6m: case X86II::MRM7m: { intptr_t PCAdj = (CurOp + X86AddrNumOperands != NumOps) ? (MI.getOperand(CurOp+X86AddrNumOperands).isImm() ? - X86InstrInfo::sizeOfImm(Desc) : 4) : 0; + X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0; MCE.emitByte(BaseOpcode); emitMemModRMByte(MI, CurOp, (Desc->TSFlags & X86II::FormMask)-X86II::MRM0m, @@ -809,7 +809,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, break; const MachineOperand &MO = MI.getOperand(CurOp++); - unsigned Size = X86InstrInfo::sizeOfImm(Desc); + unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); if (MO.isImm()) { emitConstant(MO.getImm(), Size); break; @@ -839,6 +839,27 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, getX86RegNum(MI.getOperand(CurOp).getReg())); ++CurOp; break; + + case X86II::MRM_C1: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xC1); + break; + case X86II::MRM_C8: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xC8); + break; + case X86II::MRM_C9: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xC9); + break; + case X86II::MRM_E8: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xE8); + break; + case X86II::MRM_F0: + MCE.emitByte(BaseOpcode); + MCE.emitByte(0xF0); + break; } if (!Desc->isVariadic() && CurOp != NumOps) { @@ -850,256 +871,3 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, MCE.processDebugLoc(MI.getDebugLoc(), false); } - -// Adapt the Emitter / CodeEmitter interfaces to MCCodeEmitter. -// -// FIXME: This is a total hack designed to allow work on llvm-mc to proceed -// without being blocked on various cleanups needed to support a clean interface -// to instruction encoding. -// -// Look away! - -#include "llvm/DerivedTypes.h" - -namespace { -class MCSingleInstructionCodeEmitter : public MachineCodeEmitter { - uint8_t Data[256]; - -public: - MCSingleInstructionCodeEmitter() { reset(); } - - void reset() { - BufferBegin = Data; - BufferEnd = array_endof(Data); - CurBufferPtr = Data; - } - - StringRef str() { - return StringRef(reinterpret_cast<char*>(BufferBegin), - CurBufferPtr - BufferBegin); - } - - virtual void startFunction(MachineFunction &F) {} - virtual bool finishFunction(MachineFunction &F) { return false; } - virtual void emitLabel(uint64_t LabelID) {} - virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {} - virtual bool earlyResolveAddresses() const { return false; } - virtual void addRelocation(const MachineRelocation &MR) { } - virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const { - return 0; - } - virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const { - return 0; - } - virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { - return 0; - } - virtual uintptr_t getLabelAddress(uint64_t LabelID) const { - return 0; - } - virtual void setModuleInfo(MachineModuleInfo* Info) {} -}; - -class X86MCCodeEmitter : public MCCodeEmitter { - X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT - -private: - X86TargetMachine &TM; - llvm::Function *DummyF; - TargetData *DummyTD; - mutable llvm::MachineFunction *DummyMF; - llvm::MachineBasicBlock *DummyMBB; - - MCSingleInstructionCodeEmitter *InstrEmitter; - Emitter<MachineCodeEmitter> *Emit; - -public: - X86MCCodeEmitter(X86TargetMachine &_TM) : TM(_TM) { - // Verily, thou shouldst avert thine eyes. - const llvm::FunctionType *FTy = - FunctionType::get(llvm::Type::getVoidTy(getGlobalContext()), false); - DummyF = Function::Create(FTy, GlobalValue::InternalLinkage); - DummyTD = new TargetData(""); - DummyMF = new MachineFunction(DummyF, TM); - DummyMBB = DummyMF->CreateMachineBasicBlock(); - - InstrEmitter = new MCSingleInstructionCodeEmitter(); - Emit = new Emitter<MachineCodeEmitter>(TM, *InstrEmitter, - *TM.getInstrInfo(), - *DummyTD, false); - } - ~X86MCCodeEmitter() { - delete Emit; - delete InstrEmitter; - delete DummyMF; - delete DummyF; - } - - bool AddRegToInstr(const MCInst &MI, MachineInstr *Instr, - unsigned Start) const { - if (Start + 1 > MI.getNumOperands()) - return false; - - const MCOperand &Op = MI.getOperand(Start); - if (!Op.isReg()) return false; - - Instr->addOperand(MachineOperand::CreateReg(Op.getReg(), false)); - return true; - } - - bool AddImmToInstr(const MCInst &MI, MachineInstr *Instr, - unsigned Start) const { - if (Start + 1 > MI.getNumOperands()) - return false; - - const MCOperand &Op = MI.getOperand(Start); - if (Op.isImm()) { - Instr->addOperand(MachineOperand::CreateImm(Op.getImm())); - return true; - } - if (!Op.isExpr()) - return false; - - const MCExpr *Expr = Op.getExpr(); - if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) { - Instr->addOperand(MachineOperand::CreateImm(CE->getValue())); - return true; - } - - // FIXME: Relocation / fixup. - Instr->addOperand(MachineOperand::CreateImm(0)); - return true; - } - - bool AddLMemToInstr(const MCInst &MI, MachineInstr *Instr, - unsigned Start) const { - return (AddRegToInstr(MI, Instr, Start + 0) && - AddImmToInstr(MI, Instr, Start + 1) && - AddRegToInstr(MI, Instr, Start + 2) && - AddImmToInstr(MI, Instr, Start + 3)); - } - - bool AddMemToInstr(const MCInst &MI, MachineInstr *Instr, - unsigned Start) const { - return (AddRegToInstr(MI, Instr, Start + 0) && - AddImmToInstr(MI, Instr, Start + 1) && - AddRegToInstr(MI, Instr, Start + 2) && - AddImmToInstr(MI, Instr, Start + 3) && - AddRegToInstr(MI, Instr, Start + 4)); - } - - void EncodeInstruction(const MCInst &MI, raw_ostream &OS) const { - // Don't look yet! - - // Convert the MCInst to a MachineInstr so we can (ab)use the regular - // emitter. - const X86InstrInfo &II = *TM.getInstrInfo(); - const TargetInstrDesc &Desc = II.get(MI.getOpcode()); - MachineInstr *Instr = DummyMF->CreateMachineInstr(Desc, DebugLoc()); - DummyMBB->push_back(Instr); - - unsigned Opcode = MI.getOpcode(); - unsigned NumOps = MI.getNumOperands(); - unsigned CurOp = 0; - if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1) { - Instr->addOperand(MachineOperand::CreateReg(0, false)); - ++CurOp; - } else if (NumOps > 2 && - Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) - // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 - --NumOps; - - bool OK = true; - switch (Desc.TSFlags & X86II::FormMask) { - case X86II::MRMDestReg: - case X86II::MRMSrcReg: - // Matching doesn't fill this in completely, we have to choose operand 0 - // for a tied register. - OK &= AddRegToInstr(MI, Instr, 0); CurOp++; - OK &= AddRegToInstr(MI, Instr, CurOp++); - if (CurOp < NumOps) - OK &= AddImmToInstr(MI, Instr, CurOp); - break; - - case X86II::RawFrm: - if (CurOp < NumOps) { - // Hack to make branches work. - if (!(Desc.TSFlags & X86II::ImmMask) && - MI.getOperand(0).isExpr() && - isa<MCSymbolRefExpr>(MI.getOperand(0).getExpr())) - Instr->addOperand(MachineOperand::CreateMBB(DummyMBB)); - else - OK &= AddImmToInstr(MI, Instr, CurOp); - } - break; - - case X86II::AddRegFrm: - OK &= AddRegToInstr(MI, Instr, CurOp++); - if (CurOp < NumOps) - OK &= AddImmToInstr(MI, Instr, CurOp); - break; - - case X86II::MRM0r: case X86II::MRM1r: - case X86II::MRM2r: case X86II::MRM3r: - case X86II::MRM4r: case X86II::MRM5r: - case X86II::MRM6r: case X86II::MRM7r: - // Matching doesn't fill this in completely, we have to choose operand 0 - // for a tied register. - OK &= AddRegToInstr(MI, Instr, 0); CurOp++; - if (CurOp < NumOps) - OK &= AddImmToInstr(MI, Instr, CurOp); - break; - - case X86II::MRM0m: case X86II::MRM1m: - case X86II::MRM2m: case X86II::MRM3m: - case X86II::MRM4m: case X86II::MRM5m: - case X86II::MRM6m: case X86II::MRM7m: - OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5; - if (CurOp < NumOps) - OK &= AddImmToInstr(MI, Instr, CurOp); - break; - - case X86II::MRMSrcMem: - OK &= AddRegToInstr(MI, Instr, CurOp++); - if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || - Opcode == X86::LEA16r || Opcode == X86::LEA32r) - OK &= AddLMemToInstr(MI, Instr, CurOp); - else - OK &= AddMemToInstr(MI, Instr, CurOp); - break; - - case X86II::MRMDestMem: - OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5; - OK &= AddRegToInstr(MI, Instr, CurOp); - break; - - default: - case X86II::MRMInitReg: - case X86II::Pseudo: - OK = false; - break; - } - - if (!OK) { - dbgs() << "couldn't convert inst '"; - MI.dump(); - dbgs() << "' to machine instr:\n"; - Instr->dump(); - } - - InstrEmitter->reset(); - if (OK) - Emit->emitInstruction(*Instr, &Desc); - OS << InstrEmitter->str(); - - Instr->eraseFromParent(); - } -}; -} - -// Ok, now you can look. -MCCodeEmitter *llvm::createX86MCCodeEmitter(const Target &, - TargetMachine &TM) { - return new X86MCCodeEmitter(static_cast<X86TargetMachine&>(TM)); -} diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index d5ad61b..69a9d60 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -786,8 +786,8 @@ bool X86FastISel::X86SelectCmp(Instruction *I) { bool X86FastISel::X86SelectZExt(Instruction *I) { // Handle zero-extension from i1 to i8, which is common. - if (I->getType()->isInteger(8) && - I->getOperand(0)->getType()->isInteger(1)) { + if (I->getType()->isIntegerTy(8) && + I->getOperand(0)->getType()->isIntegerTy(1)) { unsigned ResultReg = getRegForValue(I->getOperand(0)); if (ResultReg == 0) return false; // Set the high bits to zero. @@ -828,30 +828,30 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { std::swap(TrueMBB, FalseMBB); Predicate = CmpInst::FCMP_UNE; // FALL THROUGH - case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE; break; - case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA; break; - case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE; break; - case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA; break; - case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE; break; - case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE; break; - case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP; break; - case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP; break; - case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE; break; - case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB; break; - case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE; break; - case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break; - case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break; + case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break; + case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4; break; + case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break; + case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA_4; break; + case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE_4; break; + case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break; + case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break; + case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4; break; + case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4; break; + case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB_4; break; + case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break; + case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; + case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; - case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE; break; - case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE; break; - case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA; break; - case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE; break; - case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break; - case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break; - case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG; break; - case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE; break; - case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL; break; - case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE; break; + case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break; + case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break; + case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break; + case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break; + case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; + case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; + case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4; break; + case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break; + case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4; break; + case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break; default: return false; } @@ -869,7 +869,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { if (Predicate == CmpInst::FCMP_UNE) { // X86 requires a second branch to handle UNE (and OEQ, // which is mapped to UNE above). - BuildMI(MBB, DL, TII.get(X86::JP)).addMBB(TrueMBB); + BuildMI(MBB, DL, TII.get(X86::JP_4)).addMBB(TrueMBB); } FastEmitBranch(FalseMBB); @@ -923,7 +923,8 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { unsigned OpCode = SetMI->getOpcode(); if (OpCode == X86::SETOr || OpCode == X86::SETBr) { - BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? X86::JO : X86::JB)) + BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? + X86::JO_4 : X86::JB_4)) .addMBB(TrueMBB); FastEmitBranch(FalseMBB); MBB->addSuccessor(TrueMBB); @@ -939,7 +940,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { if (OpReg == 0) return false; BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg); - BuildMI(MBB, DL, TII.get(X86::JNE)).addMBB(TrueMBB); + BuildMI(MBB, DL, TII.get(X86::JNE_4)).addMBB(TrueMBB); FastEmitBranch(FalseMBB); MBB->addSuccessor(TrueMBB); return true; @@ -948,7 +949,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { bool X86FastISel::X86SelectShift(Instruction *I) { unsigned CReg = 0, OpReg = 0, OpImm = 0; const TargetRegisterClass *RC = NULL; - if (I->getType()->isInteger(8)) { + if (I->getType()->isIntegerTy(8)) { CReg = X86::CL; RC = &X86::GR8RegClass; switch (I->getOpcode()) { @@ -957,7 +958,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break; default: return false; } - } else if (I->getType()->isInteger(16)) { + } else if (I->getType()->isIntegerTy(16)) { CReg = X86::CX; RC = &X86::GR16RegClass; switch (I->getOpcode()) { @@ -966,7 +967,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break; default: return false; } - } else if (I->getType()->isInteger(32)) { + } else if (I->getType()->isIntegerTy(32)) { CReg = X86::ECX; RC = &X86::GR32RegClass; switch (I->getOpcode()) { @@ -975,7 +976,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break; default: return false; } - } else if (I->getType()->isInteger(64)) { + } else if (I->getType()->isIntegerTy(64)) { CReg = X86::RCX; RC = &X86::GR64RegClass; switch (I->getOpcode()) { @@ -1012,7 +1013,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what // we're doing here. if (CReg != X86::CL) - BuildMI(MBB, DL, TII.get(TargetInstrInfo::EXTRACT_SUBREG), X86::CL) + BuildMI(MBB, DL, TII.get(TargetOpcode::EXTRACT_SUBREG), X86::CL) .addReg(CReg).addImm(X86::SUBREG_8BIT); unsigned ResultReg = createResultReg(RC); @@ -1153,6 +1154,17 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { // FIXME: Handle more intrinsics. switch (I.getIntrinsicID()) { default: return false; + case Intrinsic::dbg_declare: { + DbgDeclareInst *DI = cast<DbgDeclareInst>(&I); + X86AddressMode AM; + assert(DI->getAddress() && "Null address should be checked earlier!"); + if (!X86SelectAddress(DI->getAddress(), AM)) + return false; + const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); + addFullAddress(BuildMI(MBB, DL, II), AM).addImm(0). + addMetadata(DI->getVariable()); + return true; + } case Intrinsic::trap: { BuildMI(MBB, DL, TII.get(X86::TRAP)); return true; @@ -1236,7 +1248,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { // fastcc with -tailcallopt is intended to provide a guaranteed // tail call optimization. Fastisel doesn't know how to do that. - if (CC == CallingConv::Fast && PerformTailCallOpt) + if (CC == CallingConv::Fast && GuaranteedTailCallOpt) return false; // Let SDISel handle vararg functions. diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h new file mode 100644 index 0000000..c8dac3c --- /dev/null +++ b/lib/Target/X86/X86FixupKinds.h @@ -0,0 +1,25 @@ +//===-- X86/X86FixupKinds.h - X86 Specific Fixup Entries --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_X86_X86FIXUPKINDS_H +#define LLVM_X86_X86FIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace X86 { +enum Fixups { + reloc_pcrel_4byte = FirstTargetFixupKind, // 32-bit pcrel, e.g. a branch. + reloc_pcrel_1byte, // 8-bit pcrel, e.g. branch_1 + reloc_riprel_4byte // 32-bit rip-relative +}; +} +} + +#endif diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 503ac14..6d6fe77 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -235,7 +235,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { unsigned Flags = MI->getDesc().TSFlags; unsigned FPInstClass = Flags & X86II::FPTypeMask; - if (MI->getOpcode() == TargetInstrInfo::INLINEASM) + if (MI->isInlineAsm()) FPInstClass = X86II::SpecialFP; if (FPInstClass == X86II::NotFP) @@ -1083,7 +1083,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { } } break; - case TargetInstrInfo::INLINEASM: { + case TargetOpcode::INLINEASM: { // The inline asm MachineInstr currently only *uses* FP registers for the // 'f' constraint. These should be turned into the current ST(x) register // in the machine instr. Also, any kills should be explicitly popped after diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp index 34a0045..6a117dd 100644 --- a/lib/Target/X86/X86FloatingPointRegKill.cpp +++ b/lib/Target/X86/X86FloatingPointRegKill.cpp @@ -118,7 +118,7 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { for (BasicBlock::const_iterator II = SI->begin(); (PN = dyn_cast<PHINode>(II)); ++II) { if (PN->getType()==Type::getX86_FP80Ty(LLVMBB->getContext()) || - (!Subtarget.hasSSE1() && PN->getType()->isFloatingPoint()) || + (!Subtarget.hasSSE1() && PN->getType()->isFloatingPointTy()) || (!Subtarget.hasSSE2() && PN->getType()==Type::getDoubleTy(LLVMBB->getContext()))) { ContainsFPCode = true; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 91e0483..7b349f6 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -183,8 +183,9 @@ namespace { virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); - virtual - bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const; + virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const; + + virtual bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root) const; // Include the pieces autogenerated from the target description. #include "X86GenDAGISel.inc" @@ -303,11 +304,18 @@ namespace { } -bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, - SDNode *Root) const { +bool +X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { if (OptLevel == CodeGenOpt::None) return false; - if (U == Root) + if (!N.hasOneUse()) + return false; + + if (N.getOpcode() != ISD::LOAD) + return true; + + // If N is a load, do additional profitability checks. + if (U == Root) { switch (U->getOpcode()) { default: break; case X86ISD::ADD: @@ -354,9 +362,17 @@ bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, } } } + } + + return true; +} + + +bool X86DAGToDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root) const { + if (OptLevel == CodeGenOpt::None) return false; // Proceed to 'generic' cycle finder code - return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root); + return SelectionDAGISel::IsLegalToFold(N, U, Root); } /// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand @@ -652,9 +668,10 @@ void X86DAGToDAGISel::PreprocessForFPConvert() { // FIXME: optimize the case where the src/dest is a load or store? SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0), - MemTmp, NULL, 0, MemVT); + MemTmp, NULL, 0, MemVT, + false, false, 0); SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, - NULL, 0, MemVT); + NULL, 0, MemVT, false, false, 0); // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the // extload we created. This will cause general havok on the dag because @@ -1310,8 +1327,8 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred, InChain = N.getOperand(0).getValue(1); if (ISD::isNON_EXTLoad(InChain.getNode()) && InChain.getValue(0).hasOneUse() && - N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op)) { + IsProfitableToFold(N, Pred.getNode(), Op) && + IsLegalToFold(N, Pred.getNode(), Op)) { LoadSDNode *LD = cast<LoadSDNode>(InChain); if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) return false; @@ -1435,8 +1452,8 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, SDValue &Index, SDValue &Disp, SDValue &Segment) { if (ISD::isNON_EXTLoad(N.getNode()) && - N.hasOneUse() && - IsLegalAndProfitableToFold(N.getNode(), P, P)) + IsProfitableToFold(N, P, P) && + IsLegalToFold(N, P, P)) return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); return false; } @@ -1606,7 +1623,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { } DebugLoc dl = Node->getDebugLoc(); - SDValue Undef = SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, + SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, NVT), 0); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); @@ -1652,8 +1669,8 @@ static bool HasNoSignedComparisonUses(SDNode *N) { case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr: case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm: case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm: - case X86::JA: case X86::JAE: case X86::JB: case X86::JBE: - case X86::JE: case X86::JNE: case X86::JP: case X86::JNP: + case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4: + case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4: case X86::CMOVA16rr: case X86::CMOVA16rm: case X86::CMOVA32rr: case X86::CMOVA32rm: case X86::CMOVA64rr: case X86::CMOVA64rm: diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 11e07df..9974d8c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12,9 +12,11 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "x86-isel" #include "X86.h" #include "X86InstrBuilder.h" #include "X86ISelLowering.h" +#include "X86MCTargetExpr.h" #include "X86TargetMachine.h" #include "X86TargetObjectFile.h" #include "llvm/CallingConv.h" @@ -26,24 +28,30 @@ #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/LLVMContext.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/VectorExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/VectorExtras.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +STATISTIC(NumTailCalls, "Number of tail calls"); + static cl::opt<bool> DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX")); @@ -67,13 +75,14 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { return new X8664_MachoTargetObjectFile(); return new X8632_MachoTargetObjectFile(); case X86Subtarget::isELF: - return new TargetLoweringObjectFileELF(); + if (TM.getSubtarget<X86Subtarget>().is64Bit()) + return new X8664_ELFTargetObjectFile(TM); + return new X8632_ELFTargetObjectFile(TM); case X86Subtarget::isMingw: case X86Subtarget::isCygwin: case X86Subtarget::isWindows: return new TargetLoweringObjectFileCOFF(); } - } X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) @@ -747,6 +756,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); + // Custom lower build_vector, vector_shuffle, and extract_vector_elt. for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) { EVT VT = (MVT::SimpleValueType)i; @@ -987,19 +1002,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) computeRegisterProperties(); - // Divide and reminder operations have no vector equivalent and can - // trap. Do a custom widening for these operations in which we never - // generate more divides/remainder than the original vector width. - for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { - if (!isTypeLegal((MVT::SimpleValueType)VT)) { - setOperationAction(ISD::SDIV, (MVT::SimpleValueType) VT, Custom); - setOperationAction(ISD::UDIV, (MVT::SimpleValueType) VT, Custom); - setOperationAction(ISD::SREM, (MVT::SimpleValueType) VT, Custom); - setOperationAction(ISD::UREM, (MVT::SimpleValueType) VT, Custom); - } - } - // FIXME: These should be based on subtarget info. Plus, the values should // be smaller when we are in optimizing for size mode. maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores @@ -1084,12 +1086,46 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, return MVT::i32; } +/// getJumpTableEncoding - Return the entry encoding for a jump table in the +/// current function. The returned value is a member of the +/// MachineJumpTableInfo::JTEntryKind enum. +unsigned X86TargetLowering::getJumpTableEncoding() const { + // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF + // symbol. + if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && + Subtarget->isPICStyleGOT()) + return MachineJumpTableInfo::EK_Custom32; + + // Otherwise, use the normal jump table encoding heuristics. + return TargetLowering::getJumpTableEncoding(); +} + +/// getPICBaseSymbol - Return the X86-32 PIC base. +MCSymbol * +X86TargetLowering::getPICBaseSymbol(const MachineFunction *MF, + MCContext &Ctx) const { + const MCAsmInfo &MAI = *getTargetMachine().getMCAsmInfo(); + return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+ + Twine(MF->getFunctionNumber())+"$pb"); +} + + +const MCExpr * +X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, + unsigned uid,MCContext &Ctx) const{ + assert(getTargetMachine().getRelocationModel() == Reloc::PIC_ && + Subtarget->isPICStyleGOT()); + // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF + // entries. + return X86MCTargetExpr::Create(MBB->getSymbol(Ctx), + X86MCTargetExpr::GOTOFF, Ctx); +} + /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC /// jumptable. SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, - SelectionDAG &DAG) const { - if (usesGlobalOffsetTable()) - return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy()); + SelectionDAG &DAG) const { if (!Subtarget->is64Bit()) // This doesn't have DebugLoc associated with it, but is not really the // same as a Register. @@ -1098,6 +1134,20 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, return Table; } +/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the +/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an +/// MCExpr. +const MCExpr *X86TargetLowering:: +getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, + MCContext &Ctx) const { + // X86-64 uses RIP relative addressing based on the jump table label. + if (Subtarget->isPICStyleRIPRel()) + return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); + + // Otherwise, the reference is relative to the PIC base. + return MCSymbolRefExpr::Create(getPICBaseSymbol(MF, Ctx), Ctx); +} + /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4; @@ -1131,13 +1181,11 @@ X86TargetLowering::LowerReturn(SDValue Chain, RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_X86); - // If this is the first return lowered for this function, add the regs to the - // liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } + // Add the regs to the liveout set for the function. + MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); + for (unsigned i = 0; i != RVLocs.size(); ++i) + if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) + MRI.addLiveOut(RVLocs[i].getLocReg()); SDValue Flag; @@ -1190,7 +1238,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); unsigned Reg = FuncInfo->getSRetReturnReg(); if (!Reg) { - Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); + Reg = MRI.createVirtualRegister(getRegClassFor(MVT::i64)); FuncInfo->setSRetReturnReg(Reg); } SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); @@ -1199,7 +1247,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, Flag = Chain.getValue(1); // RAX now acts like a return value. - MF.getRegInfo().addLiveOut(X86::RAX); + MRI.addLiveOut(X86::RAX); } RetOps[0] = Chain; // Update chain. @@ -1329,7 +1377,7 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){ case CallingConv::X86_FastCall: return !Subtarget->is64Bit(); case CallingConv::Fast: - return PerformTailCallOpt; + return GuaranteedTailCallOpt; } } @@ -1351,18 +1399,6 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { return CC_X86_32_C; } -/// NameDecorationForCallConv - Selects the appropriate decoration to -/// apply to a MachineFunction containing a given calling convention. -NameDecorationStyle -X86TargetLowering::NameDecorationForCallConv(CallingConv::ID CallConv) { - if (CallConv == CallingConv::X86_FastCall) - return FastCall; - else if (CallConv == CallingConv::X86_StdCall) - return StdCall; - return None; -} - - /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified /// by "Src" to address "Dst" with size and alignment information specified by /// the specific parameter attribute. The copy will be passed as a byval @@ -1376,6 +1412,12 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, /*AlwaysInline=*/true, NULL, 0, NULL, 0); } +/// FuncIsMadeTailCallSafe - Return true if the function is being made into +/// a tailcall target by changing its ABI. +static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) { + return GuaranteedTailCallOpt && CC == CallingConv::Fast; +} + SDValue X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, @@ -1384,10 +1426,9 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, const CCValAssign &VA, MachineFrameInfo *MFI, unsigned i) { - // Create the nodes corresponding to a load from this parameter slot. ISD::ArgFlagsTy Flags = Ins[i].Flags; - bool AlwaysUseMutable = (CallConv==CallingConv::Fast) && PerformTailCallOpt; + bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv); bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); EVT ValVT; @@ -1402,13 +1443,18 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, // changed with more analysis. // In case of tail call optimization mark all arguments mutable. Since they // could be overwritten by lowering of arguments in case of a tail call. - int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, - VA.getLocMemOffset(), isImmutable, false); - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - if (Flags.isByVal()) - return FIN; - return DAG.getLoad(ValVT, dl, Chain, FIN, - PseudoSourceValue::getFixedStack(FI), 0); + if (Flags.isByVal()) { + int FI = MFI->CreateFixedObject(Flags.getByValSize(), + VA.getLocMemOffset(), isImmutable, false); + return DAG.getFrameIndex(FI, getPointerTy()); + } else { + int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, + VA.getLocMemOffset(), isImmutable, false); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + return DAG.getLoad(ValVT, dl, Chain, FIN, + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0); + } } SDValue @@ -1429,9 +1475,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, Fn->getName() == "main") FuncInfo->setForceFramePointer(true); - // Decorate the function name. - FuncInfo->setDecorationStyle(NameDecorationForCallConv(CallConv)); - MachineFrameInfo *MFI = MF.getFrameInfo(); bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isTargetWin64(); @@ -1504,7 +1547,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // If value is passed via pointer - do a load. if (VA.getLocInfo() == CCValAssign::Indirect) - ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0); + ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0, + false, false, 0); InVals.push_back(ArgValue); } @@ -1524,8 +1568,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, } unsigned StackSize = CCInfo.getNextStackOffset(); - // align stack specially for tail calls - if (PerformTailCallOpt && CallConv == CallingConv::Fast) + // Align stack specially for tail calls. + if (FuncIsMadeTailCallSafe(CallConv)) StackSize = GetAlignedArgumentStackSize(StackSize, DAG); // If the function takes variable number of arguments, make a frame index for @@ -1599,7 +1643,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, PseudoSourceValue::getFixedStack(RegSaveFrameIndex), - Offset); + Offset, false, false, 0); MemOps.push_back(Store); Offset += 8; } @@ -1636,13 +1680,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // Some CCs need callee pop. if (IsCalleePop(isVarArg, CallConv)) { BytesToPopOnReturn = StackSize; // Callee pops everything. - BytesCallerReserves = 0; } else { BytesToPopOnReturn = 0; // Callee pops nothing. // If this is an sret function, the return should pop the hidden pointer. if (!Is64Bit && CallConv != CallingConv::Fast && ArgsAreStructReturn(Ins)) BytesToPopOnReturn = 4; - BytesCallerReserves = StackSize; } if (!Is64Bit) { @@ -1670,27 +1712,23 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain, return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); } return DAG.getStore(Chain, dl, Arg, PtrOff, - PseudoSourceValue::getStack(), LocMemOffset); + PseudoSourceValue::getStack(), LocMemOffset, + false, false, 0); } /// EmitTailCallLoadRetAddr - Emit a load of return address if tail call /// optimization is performed and it is required. SDValue X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, - SDValue &OutRetAddr, - SDValue Chain, - bool IsTailCall, - bool Is64Bit, - int FPDiff, - DebugLoc dl) { - if (!IsTailCall || FPDiff==0) return Chain; - + SDValue &OutRetAddr, SDValue Chain, + bool IsTailCall, bool Is64Bit, + int FPDiff, DebugLoc dl) { // Adjust the Return address stack slot. EVT VT = getPointerTy(); OutRetAddr = getReturnAddressFrameIndex(DAG); // Load the "old" Return address. - OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0); + OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0, false, false, 0); return SDValue(OutRetAddr.getNode(), 1); } @@ -1705,31 +1743,42 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, // Calculate the new stack slot for the return address. int SlotSize = Is64Bit ? 8 : 4; int NewReturnAddrFI = - MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, - true, false); + MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, true,false); EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, - PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0); + PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0, + false, false, 0); return Chain; } SDValue X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, + bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) { - MachineFunction &MF = DAG.getMachineFunction(); bool Is64Bit = Subtarget->is64Bit(); bool IsStructRet = CallIsStructReturn(Outs); + bool IsSibcall = false; + + if (isTailCall) { + // Check if it's really possible to do a tail call. + isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, + Outs, Ins, DAG); + + // Sibcalls are automatically detected tailcalls which do not require + // ABI changes. + if (!GuaranteedTailCallOpt && isTailCall) + IsSibcall = true; + + if (isTailCall) + ++NumTailCalls; + } - assert((!isTailCall || - (CallConv == CallingConv::Fast && PerformTailCallOpt)) && - "IsEligibleForTailCallOptimization missed a case!"); assert(!(isVarArg && CallConv == CallingConv::Fast) && "Var args not supported with calling convention fastcc"); @@ -1741,11 +1790,15 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); - if (PerformTailCallOpt && CallConv == CallingConv::Fast) + if (IsSibcall) + // This is a sibcall. The memory operands are available in caller's + // own caller's stack. + NumBytes = 0; + else if (GuaranteedTailCallOpt && CallConv == CallingConv::Fast) NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); int FPDiff = 0; - if (isTailCall) { + if (isTailCall && !IsSibcall) { // Lower arguments at fp - stackoffset + fpdiff. unsigned NumBytesCallerPushed = MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn(); @@ -1757,12 +1810,14 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff); } - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + if (!IsSibcall) + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); SDValue RetAddrFrIdx; // Load return adress for tail calls. - Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, Is64Bit, - FPDiff, dl); + if (isTailCall && FPDiff) + Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, + Is64Bit, FPDiff, dl); SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; SmallVector<SDValue, 8> MemOpChains; @@ -1804,7 +1859,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); Chain = DAG.getStore(Chain, dl, Arg, SpillSlot, - PseudoSourceValue::getFixedStack(FI), 0); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0); Arg = SpillSlot; break; } @@ -1812,15 +1868,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - } else { - if (!isTailCall || (isTailCall && isByVal)) { - assert(VA.isMemLoc()); - if (StackPtr.getNode() == 0) - StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy()); - - MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, - dl, DAG, VA, Flags)); - } + } else if (!IsSibcall && (!isTailCall || isByVal)) { + assert(VA.isMemLoc()); + if (StackPtr.getNode() == 0) + StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy()); + MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, + dl, DAG, VA, Flags)); } } @@ -1840,7 +1893,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, InFlag = Chain.getValue(1); } - if (Subtarget->isPICStyleGOT()) { // ELF / PIC requires GOT in the EBX register before function calls via PLT // GOT pointer. @@ -1910,9 +1962,11 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, int FI = 0; // Do not flag preceeding copytoreg stuff together with the following stuff. InFlag = SDValue(); - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - if (!VA.isRegLoc()) { + if (GuaranteedTailCallOpt) { + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (VA.isRegLoc()) + continue; assert(VA.isMemLoc()); SDValue Arg = Outs[i].Val; ISD::ArgFlagsTy Flags = Outs[i].Flags; @@ -1937,7 +1991,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Store relative to framepointer. MemOpChains2.push_back( DAG.getStore(ArgChain, dl, Arg, FIN, - PseudoSourceValue::getFixedStack(FI), 0)); + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0)); } } } @@ -2020,21 +2075,22 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, } if (isTailCall && !WasGlobalOrExternal) { - unsigned Opc = Is64Bit ? X86::R11 : X86::EAX; - + // Force the address into a (call preserved) caller-saved register since + // tailcall must happen after callee-saved registers are poped. + // FIXME: Give it a special register class that contains caller-saved + // register instead? + unsigned TCReg = Is64Bit ? X86::R11 : X86::EAX; Chain = DAG.getCopyToReg(Chain, dl, - DAG.getRegister(Opc, getPointerTy()), + DAG.getRegister(TCReg, getPointerTy()), Callee,InFlag); - Callee = DAG.getRegister(Opc, getPointerTy()); - // Add register as live out. - MF.getRegInfo().addLiveOut(Opc); + Callee = DAG.getRegister(TCReg, getPointerTy()); } // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); SmallVector<SDValue, 8> Ops; - if (isTailCall) { + if (!IsSibcall && isTailCall) { Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), DAG.getIntPtrConstant(0, true), InFlag); InFlag = Chain.getValue(1); @@ -2095,7 +2151,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (IsCalleePop(isVarArg, CallConv)) NumBytesForCalleeToPush = NumBytes; // Callee pops everything else if (!Is64Bit && CallConv != CallingConv::Fast && IsStructRet) - // If this is is a call to a struct-return function, the callee + // If this is a call to a struct-return function, the callee // pops the hidden struct pointer, so we have to push it back. // This is common for Darwin/X86, Linux & Mingw32 targets. NumBytesForCalleeToPush = 4; @@ -2103,12 +2159,14 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, NumBytesForCalleeToPush = 0; // Callee pops nothing. // Returns a flag for retval copy to use. - Chain = DAG.getCALLSEQ_END(Chain, - DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(NumBytesForCalleeToPush, - true), - InFlag); - InFlag = Chain.getValue(1); + if (!IsSibcall) { + Chain = DAG.getCALLSEQ_END(Chain, + DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(NumBytesForCalleeToPush, + true), + InFlag); + InFlag = Chain.getValue(1); + } // Handle result values, copying them out of physregs into vregs that we // return. @@ -2170,6 +2228,50 @@ unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, return Offset; } +/// MatchingStackOffset - Return true if the given stack call argument is +/// already available in the same position (relatively) of the caller's +/// incoming argument stack. +static +bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, + MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, + const X86InstrInfo *TII) { + int FI; + if (Arg.getOpcode() == ISD::CopyFromReg) { + unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); + if (!VR || TargetRegisterInfo::isPhysicalRegister(VR)) + return false; + MachineInstr *Def = MRI->getVRegDef(VR); + if (!Def) + return false; + if (!Flags.isByVal()) { + if (!TII->isLoadFromStackSlot(Def, FI)) + return false; + } else { + unsigned Opcode = Def->getOpcode(); + if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) && + Def->getOperand(1).isFI()) { + FI = Def->getOperand(1).getIndex(); + if (MFI->getObjectSize(FI) != Flags.getByValSize()) + return false; + } else + return false; + } + } else { + LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg); + if (!Ld) + return false; + SDValue Ptr = Ld->getBasePtr(); + FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); + if (!FINode) + return false; + FI = FINode->getIndex(); + } + + if (!MFI->isFixedObjectIndex(FI)) + return false; + return Offset == MFI->getObjectOffset(FI); +} + /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. @@ -2177,23 +2279,79 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const { - MachineFunction &MF = DAG.getMachineFunction(); - CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); - return CalleeCC == CallingConv::Fast && CallerCC == CalleeCC; + if (CalleeCC != CallingConv::Fast && + CalleeCC != CallingConv::C) + return false; + + // If -tailcallopt is specified, make fastcc functions tail-callable. + const Function *CallerF = DAG.getMachineFunction().getFunction(); + if (GuaranteedTailCallOpt) { + if (CalleeCC == CallingConv::Fast && + CallerF->getCallingConv() == CalleeCC) + return true; + return false; + } + + // Look for obvious safe cases to perform tail call optimization that does not + // requite ABI changes. This is what gcc calls sibcall. + + // Do not tail call optimize vararg calls for now. + if (isVarArg) + return false; + + // If the callee takes no arguments then go on to check the results of the + // call. + if (!Outs.empty()) { + // Check if stack adjustment is needed. For now, do not do this if any + // argument is passed on the stack. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC)); + if (CCInfo.getNextStackOffset()) { + MachineFunction &MF = DAG.getMachineFunction(); + if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) + return false; + if (Subtarget->isTargetWin64()) + // Win64 ABI has additional complications. + return false; + + // Check if the arguments are already laid out in the right way as + // the caller's fixed stack objects. + MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineRegisterInfo *MRI = &MF.getRegInfo(); + const X86InstrInfo *TII = + ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + EVT RegVT = VA.getLocVT(); + SDValue Arg = Outs[i].Val; + ISD::ArgFlagsTy Flags = Outs[i].Flags; + if (VA.getLocInfo() == CCValAssign::Indirect) + return false; + if (!VA.isRegLoc()) { + if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, + MFI, MRI, TII)) + return false; + } + } + } + } + + return true; } FastISel * -X86TargetLowering::createFastISel(MachineFunction &mf, - MachineModuleInfo *mmo, - DwarfWriter *dw, - DenseMap<const Value *, unsigned> &vm, - DenseMap<const BasicBlock *, - MachineBasicBlock *> &bm, - DenseMap<const AllocaInst *, int> &am +X86TargetLowering::createFastISel(MachineFunction &mf, MachineModuleInfo *mmo, + DwarfWriter *dw, + DenseMap<const Value *, unsigned> &vm, + DenseMap<const BasicBlock*, MachineBasicBlock*> &bm, + DenseMap<const AllocaInst *, int> &am #ifndef NDEBUG - , SmallSet<Instruction*, 8> &cil + , SmallSet<Instruction*, 8> &cil #endif ) { return X86::createFastISel(mf, mmo, dw, vm, bm, am @@ -3413,7 +3571,8 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, int EltNo = (Offset - StartOffset) >> 2; int Mask[4] = { EltNo, EltNo, EltNo, EltNo }; EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32; - SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0); + SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0, + false, false, 0); // Canonicalize it to a v4i32 shuffle. V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, @@ -3686,6 +3845,33 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { return SDValue(); } +SDValue +X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { + // We support concatenate two MMX registers and place them in a MMX + // register. This is better than doing a stack convert. + DebugLoc dl = Op.getDebugLoc(); + EVT ResVT = Op.getValueType(); + assert(Op.getNumOperands() == 2); + assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 || + ResVT == MVT::v8i16 || ResVT == MVT::v16i8); + int Mask[2]; + SDValue InVec = DAG.getNode(ISD::BIT_CONVERT,dl, MVT::v1i64, Op.getOperand(0)); + SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec); + InVec = Op.getOperand(1); + if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { + unsigned NumElts = ResVT.getVectorNumElements(); + VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp); + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp, + InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1)); + } else { + InVec = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v1i64, InVec); + SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec); + Mask[0] = 0; Mask[1] = 2; + VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask); + } + return DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp); +} + // v8i16 shuffles - Prefer shuffles in the following order: // 1. [all] pshuflw, pshufhw, optional move // 2. [ssse3] 1 x pshufb @@ -4885,7 +5071,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, // load. if (isGlobalStubReference(OpFlags)) Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result, - PseudoSourceValue::getGOT(), 0); + PseudoSourceValue::getGOT(), 0, false, false, 0); // If there was a non-zero offset that we didn't fold, create an explicit // addition for it. @@ -4965,7 +5151,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, MVT::i32)); SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base, - NULL, 0); + NULL, 0, false, false, 0); unsigned char OperandFlags = 0; // Most TLS accesses are not RIP relative, even on x86-64. One exception is @@ -4990,7 +5176,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, if (model == TLSModel::InitialExec) Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset, - PseudoSourceValue::getGOT(), 0); + PseudoSourceValue::getGOT(), 0, false, false, 0); // The address of the thread local variable is the add of the thread // pointer with the offset of the variable. @@ -5107,7 +5293,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, - PseudoSourceValue::getFixedStack(SSFI), 0); + PseudoSourceValue::getFixedStack(SSFI), 0, + false, false, 0); return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG); } @@ -5142,7 +5329,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, }; Chain = DAG.getNode(X86ISD::FST, dl, Tys, Ops, array_lengthof(Ops)); Result = DAG.getLoad(Op.getValueType(), dl, Chain, StackSlot, - PseudoSourceValue::getFixedStack(SSFI), 0); + PseudoSourceValue::getFixedStack(SSFI), 0, + false, false, 0); } return Result; @@ -5215,12 +5403,12 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2); SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, PseudoSourceValue::getConstantPool(), 0, - false, 16); + false, false, 16); SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0); SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2); SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, PseudoSourceValue::getConstantPool(), 0, - false, 16); + false, false, 16); SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); // Add the halves; easiest way is to swap them into another reg first. @@ -5307,9 +5495,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) { SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackSlot, WordOff); SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), - StackSlot, NULL, 0); + StackSlot, NULL, 0, false, false, 0); SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32), - OffsetSlot, NULL, 0); + OffsetSlot, NULL, 0, false, false, 0); return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); } @@ -5357,7 +5545,8 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) { assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!"); Chain = DAG.getStore(Chain, dl, Value, StackSlot, - PseudoSourceValue::getFixedStack(SSFI), 0); + PseudoSourceValue::getFixedStack(SSFI), 0, + false, false, 0); SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) @@ -5391,7 +5580,7 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { // Load the result. return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), - FIST, StackSlot, NULL, 0); + FIST, StackSlot, NULL, 0, false, false, 0); } SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) { @@ -5401,7 +5590,7 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) { // Load the result. return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), - FIST, StackSlot, NULL, 0); + FIST, StackSlot, NULL, 0, false, false, 0); } SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { @@ -5426,8 +5615,8 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, - false, 16); + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask); } @@ -5453,8 +5642,8 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) { Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, - false, 16); + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); if (VT.isVector()) { return DAG.getNode(ISD::BIT_CONVERT, dl, VT, DAG.getNode(ISD::XOR, dl, MVT::v2i64, @@ -5502,8 +5691,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, - false, 16); + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1); // Shift sign bit right or left if the two operands have different types. @@ -5531,8 +5720,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { C = ConstantVector::get(CV); CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - PseudoSourceValue::getConstantPool(), 0, - false, 16); + PseudoSourceValue::getConstantPool(), 0, + false, false, 16); SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2); // Or the value with the sign bit. @@ -5919,6 +6108,29 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { Cond = NewCond; } + // (select (x == 0), -1, 0) -> (sign_bit (x - 1)) + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + if (Cond.getOpcode() == X86ISD::SETCC && + cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue() == X86::COND_E) { + SDValue Cmp = Cond.getOperand(1); + if (Cmp.getOpcode() == X86ISD::CMP) { + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op1); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Op2); + ConstantSDNode *RHSC = + dyn_cast<ConstantSDNode>(Cmp.getOperand(1).getNode()); + if (N1C && N1C->isAllOnesValue() && + N2C && N2C->isNullValue() && + RHSC && RHSC->isNullValue()) { + SDValue CmpOp0 = Cmp.getOperand(0); + Cmp = DAG.getNode(X86ISD::CMP, dl, CmpOp0.getValueType(), + CmpOp0, DAG.getConstant(1, CmpOp0.getValueType())); + return DAG.getNode(X86ISD::SETCC_CARRY, dl, Op.getValueType(), + DAG.getConstant(X86::COND_B, MVT::i8), Cmp); + } + } + } + // Look pass (and (setcc_carry (cmp ...)), 1). if (Cond.getOpcode() == ISD::AND && Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { @@ -5971,10 +6183,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { Cond = EmitTest(Cond, X86::COND_NE, DAG); } - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag); // X86ISD::CMOV means set the result (which is operand 1) to the RHS if // condition is true. - SDValue Ops[] = { Op.getOperand(2), Op.getOperand(1), CC, Cond }; + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag); + SDValue Ops[] = { Op2, Op1, CC, Cond }; return DAG.getNode(X86ISD::CMOV, dl, VTs, Ops, array_lengthof(Ops)); } @@ -6417,7 +6629,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); - return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, + false, false, 0); } // __va_list_tag: @@ -6429,8 +6642,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { SDValue FIN = Op.getOperand(1); // Store gp_offset SDValue Store = DAG.getStore(Op.getOperand(0), dl, - DAG.getConstant(VarArgsGPOffset, MVT::i32), - FIN, SV, 0); + DAG.getConstant(VarArgsGPOffset, MVT::i32), + FIN, SV, 0, false, false, 0); MemOps.push_back(Store); // Store fp_offset @@ -6438,21 +6651,23 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { FIN, DAG.getIntPtrConstant(4)); Store = DAG.getStore(Op.getOperand(0), dl, DAG.getConstant(VarArgsFPOffset, MVT::i32), - FIN, SV, 0); + FIN, SV, 0, false, false, 0); MemOps.push_back(Store); // Store ptr to overflow_arg_area FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); SDValue OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0); + Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0, + false, false, 0); MemOps.push_back(Store); // Store ptr to reg_save_area. FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getIntPtrConstant(8)); SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0); + Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0, + false, false, 0); MemOps.push_back(Store); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0], MemOps.size()); @@ -6738,13 +6953,13 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameAddr, Offset), - NULL, 0); + NULL, 0, false, false, 0); } // Just load the return address. SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - RetAddrFI, NULL, 0); + RetAddrFI, NULL, 0, false, false, 0); } SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { @@ -6756,7 +6971,8 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP; SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) - FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0); + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, + false, false, 0); return FrameAddr; } @@ -6780,7 +6996,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) SDValue StoreAddr = DAG.getNode(ISD::SUB, dl, getPointerTy(), Frame, DAG.getIntPtrConstant(-TD->getPointerSize())); StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset); - Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0); + Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0, false, false, 0); Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); MF.getRegInfo().addLiveOut(StoreAddrReg); @@ -6799,16 +7015,12 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); - const X86InstrInfo *TII = - ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); - if (Subtarget->is64Bit()) { SDValue OutChains[6]; // Large code-model. - - const unsigned char JMP64r = TII->getBaseOpcodeFor(X86::JMP64r); - const unsigned char MOV64ri = TII->getBaseOpcodeFor(X86::MOV64ri); + const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode. + const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode. const unsigned char N86R10 = RegInfo->getX86RegNum(X86::R10); const unsigned char N86R11 = RegInfo->getX86RegNum(X86::R11); @@ -6819,11 +7031,12 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11 SDValue Addr = Trmp; OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), - Addr, TrmpAddr, 0); + Addr, TrmpAddr, 0, false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(2, MVT::i64)); - OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2, false, 2); + OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2, + false, false, 2); // Load the 'nest' parameter value into R10. // R10 is specified in X86CallingConv.td @@ -6831,24 +7044,25 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(10, MVT::i64)); OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), - Addr, TrmpAddr, 10); + Addr, TrmpAddr, 10, false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(12, MVT::i64)); - OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12, false, 2); + OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12, + false, false, 2); // Jump to the nested function. OpCode = (JMP64r << 8) | REX_WB; // jmpq *... Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(20, MVT::i64)); OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), - Addr, TrmpAddr, 20); + Addr, TrmpAddr, 20, false, false, 0); unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11 Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, DAG.getConstant(22, MVT::i64)); OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr, - TrmpAddr, 22); + TrmpAddr, 22, false, false, 0); SDValue Ops[] = { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) }; @@ -6903,25 +7117,28 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, DAG.getConstant(10, MVT::i32)); Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr); - const unsigned char MOV32ri = TII->getBaseOpcodeFor(X86::MOV32ri); + // This is storing the opcode for MOV32ri. + const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte. const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg); OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(MOV32ri|N86Reg, MVT::i8), - Trmp, TrmpAddr, 0); + Trmp, TrmpAddr, 0, false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(1, MVT::i32)); - OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1, false, 1); + OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1, + false, false, 1); - const unsigned char JMP = TII->getBaseOpcodeFor(X86::JMP); + const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode. Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(5, MVT::i32)); OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr, - TrmpAddr, 5, false, 1); + TrmpAddr, 5, false, false, 1); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, DAG.getConstant(6, MVT::i32)); - OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6, false, 1); + OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6, + false, false, 1); SDValue Ops[] = { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) }; @@ -6964,7 +7181,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { DAG.getEntryNode(), StackSlot); // Load FP Control Word from stack slot - SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0); + SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0, + false, false, 0); // Transform as necessary SDValue CWD1 = @@ -7238,6 +7456,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG); case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); + case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); @@ -7327,7 +7546,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, if (FIST.getNode() != 0) { EVT VT = N->getValueType(0); // Return a load from the stack slot. - Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0)); + Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0, + false, false, 0)); } return; } @@ -7345,14 +7565,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(edx.getValue(1)); return; } - case ISD::SDIV: - case ISD::UDIV: - case ISD::SREM: - case ISD::UREM: { - EVT WidenVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - Results.push_back(DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements())); - return; - } case ISD::ATOMIC_CMP_SWAP: { EVT T = N->getValueType(0); assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap"); @@ -7551,7 +7763,7 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { - if (!Ty1->isInteger() || !Ty2->isInteger()) + if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); @@ -7572,7 +7784,7 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const { // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. - return Ty1->isInteger(32) && Ty2->isInteger(64) && Subtarget->is64Bit(); + return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit(); } bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { @@ -7728,7 +7940,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, MIB.addReg(EAXreg); // insert branch - BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB); + BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. return nextMBB; @@ -7885,7 +8097,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, MIB.addReg(X86::EDX); // insert branch - BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB); + BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. return nextMBB; @@ -7988,7 +8200,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, MIB.addReg(X86::EAX); // insert branch - BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB); + BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); F->DeleteMachineInstr(mInstr); // The pseudo instruction is gone now. return nextMBB; @@ -8070,7 +8282,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( if (!Subtarget->isTargetWin64()) { // If %al is 0, branch around the XMM save block. BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg); - BuildMI(MBB, DL, TII->get(X86::JE)).addMBB(EndMBB); + BuildMI(MBB, DL, TII->get(X86::JE_4)).addMBB(EndMBB); MBB->addSuccessor(EndMBB); } @@ -8556,10 +8768,11 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16) return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile()); + LD->isVolatile(), LD->isNonTemporal(), 0); return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), - LD->isVolatile(), LD->getAlignment()); + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); } else if (NumElems == 4 && LastLoadedElt == 1) { SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; @@ -9278,7 +9491,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), Ld->getSrcValue(), Ld->getSrcValueOffset(), Ld->isVolatile(), - Ld->getAlignment()); + Ld->isNonTemporal(), Ld->getAlignment()); SDValue NewChain = NewLd.getValue(1); if (TokenFactorIndex != -1) { Ops.push_back(NewChain); @@ -9287,7 +9500,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, } return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(), St->getSrcValue(), St->getSrcValueOffset(), - St->isVolatile(), St->getAlignment()); + St->isVolatile(), St->isNonTemporal(), + St->getAlignment()); } // Otherwise, lower to two pairs of 32-bit loads / stores. @@ -9297,10 +9511,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr, Ld->getSrcValue(), Ld->getSrcValueOffset(), - Ld->isVolatile(), Ld->getAlignment()); + Ld->isVolatile(), Ld->isNonTemporal(), + Ld->getAlignment()); SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr, Ld->getSrcValue(), Ld->getSrcValueOffset()+4, - Ld->isVolatile(), + Ld->isVolatile(), Ld->isNonTemporal(), MinAlign(Ld->getAlignment(), 4)); SDValue NewChain = LoLd.getValue(1); @@ -9317,11 +9532,13 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr, St->getSrcValue(), St->getSrcValueOffset(), - St->isVolatile(), St->getAlignment()); + St->isVolatile(), St->isNonTemporal(), + St->getAlignment()); SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr, St->getSrcValue(), St->getSrcValueOffset() + 4, St->isVolatile(), + St->isNonTemporal(), MinAlign(St->getAlignment(), 4)); return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt); } @@ -9504,7 +9721,7 @@ static bool LowerToBSwap(CallInst *CI) { // Verify this is a simple bswap. if (CI->getNumOperands() != 2 || CI->getType() != CI->getOperand(1)->getType() || - !CI->getType()->isInteger()) + !CI->getType()->isIntegerTy()) return false; const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); @@ -9553,7 +9770,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { return LowerToBSwap(CI); } // rorw $$8, ${0:w} --> llvm.bswap.i16 - if (CI->getType()->isInteger(16) && + if (CI->getType()->isIntegerTy(16) && AsmPieces.size() == 3 && AsmPieces[0] == "rorw" && AsmPieces[1] == "$$8," && @@ -9563,7 +9780,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { } break; case 3: - if (CI->getType()->isInteger(64) && + if (CI->getType()->isIntegerTy(64) && Constraints.size() >= 2 && Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 64bc70c..cf0eb40 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -19,6 +19,7 @@ #include "X86RegisterInfo.h" #include "X86MachineFunctionInfo.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -156,6 +157,11 @@ namespace llvm { /// relative displacements. WrapperRIP, + /// MOVQ2DQ - Copies a 64-bit value from a vector to another vector. + /// Can be used to move a vector value from a MMX register to a XMM + /// register. + MOVQ2DQ, + /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to /// i32, corresponds to X86::PEXTRB. PEXTRB, @@ -366,25 +372,33 @@ namespace llvm { unsigned VarArgsGPOffset; // X86-64 vararg func int reg offset. unsigned VarArgsFPOffset; // X86-64 vararg func fp reg offset. int BytesToPopOnReturn; // Number of arg bytes ret should pop. - int BytesCallerReserves; // Number of arg bytes caller makes. public: explicit X86TargetLowering(X86TargetMachine &TM); + /// getPICBaseSymbol - Return the X86-32 PIC base. + MCSymbol *getPICBaseSymbol(const MachineFunction *MF, MCContext &Ctx) const; + + virtual unsigned getJumpTableEncoding() const; + + virtual const MCExpr * + LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, unsigned uid, + MCContext &Ctx) const; + /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC /// jumptable. - SDValue getPICJumpTableRelocBase(SDValue Table, - SelectionDAG &DAG) const; - + virtual SDValue getPICJumpTableRelocBase(SDValue Table, + SelectionDAG &DAG) const; + virtual const MCExpr * + getPICJumpTableRelocBaseExpr(const MachineFunction *MF, + unsigned JTI, MCContext &Ctx) const; + // Return the number of bytes that a function should pop when it returns (in // addition to the space used by the return address). // unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; } - // Return the number of bytes that the caller reserves for arguments passed - // to this function. - unsigned getBytesCallerReserves() const { return BytesCallerReserves; } - /// getStackPtrReg - Return the stack pointer register we are using: either /// ESP or RSP. unsigned getStackPtrReg() const { return X86StackPtr; } @@ -532,16 +546,6 @@ namespace llvm { return !X86ScalarSSEf64 || VT == MVT::f80; } - /// IsEligibleForTailCallOptimization - Check whether the call is eligible - /// for tail call optimization. Targets which want to do tail call - /// optimization should implement this function. - virtual bool - IsEligibleForTailCallOptimization(SDValue Callee, - CallingConv::ID CalleeCC, - bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - SelectionDAG& DAG) const; - virtual const X86Subtarget* getSubtarget() { return Subtarget; } @@ -619,13 +623,22 @@ namespace llvm { ISD::ArgFlagsTy Flags); // Call lowering helpers. + + /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. Targets which want to do tail call + /// optimization should implement this function. + bool IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const; bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv); SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, bool Is64Bit, int FPDiff, DebugLoc dl); CCAssignFn *CCAssignFnForNode(CallingConv::ID CallConv) const; - NameDecorationStyle NameDecorationForCallConv(CallingConv::ID CallConv); unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG); std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, @@ -634,6 +647,7 @@ namespace llvm { SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, SelectionDAG &DAG); SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG); + SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG); SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG); SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG); SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG); @@ -693,7 +707,7 @@ namespace llvm { SmallVectorImpl<SDValue> &InVals); virtual SDValue LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 9037ba6..4ea3739 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -187,7 +187,7 @@ def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64:$dst, i32imm:$offset, let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in - def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64:$dst), + def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64:$dst, variable_ops), "jmp{q}\t{*}$dst # TAILCALL", []>; @@ -435,7 +435,7 @@ def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), // up to 64 bits. def def32 : PatLeaf<(i32 GR32:$src), [{ return N->getOpcode() != ISD::TRUNCATE && - N->getOpcode() != TargetInstrInfo::EXTRACT_SUBREG && + N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && N->getOpcode() != ISD::CopyFromReg && N->getOpcode() != X86ISD::CMOV; }]>; @@ -893,35 +893,38 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), let isTwoAddress = 1 in { def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src), "rcl{q}\t{1, $dst|$dst, 1}", []>; -def RCL64m1 : RI<0xD1, MRM2m, (outs i64mem:$dst), (ins i64mem:$src), - "rcl{q}\t{1, $dst|$dst, 1}", []>; -let Uses = [CL] in { -def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src), - "rcl{q}\t{%cl, $dst|$dst, CL}", []>; -def RCL64mCL : RI<0xD3, MRM2m, (outs i64mem:$dst), (ins i64mem:$src), - "rcl{q}\t{%cl, $dst|$dst, CL}", []>; -} def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL64mi : RIi8<0xC1, MRM2m, (outs i64mem:$dst), - (ins i64mem:$src, i8imm:$cnt), - "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src), "rcr{q}\t{1, $dst|$dst, 1}", []>; -def RCR64m1 : RI<0xD1, MRM3m, (outs i64mem:$dst), (ins i64mem:$src), - "rcr{q}\t{1, $dst|$dst, 1}", []>; +def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), + "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; + let Uses = [CL] in { +def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src), + "rcl{q}\t{%cl, $dst|$dst, CL}", []>; def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src), "rcr{q}\t{%cl, $dst|$dst, CL}", []>; -def RCR64mCL : RI<0xD3, MRM3m, (outs i64mem:$dst), (ins i64mem:$src), - "rcr{q}\t{%cl, $dst|$dst, CL}", []>; } -def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), - "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR64mi : RIi8<0xC1, MRM3m, (outs i64mem:$dst), - (ins i64mem:$src, i8imm:$cnt), +} + +let isTwoAddress = 0 in { +def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst), + "rcl{q}\t{1, $dst|$dst, 1}", []>; +def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt), + "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst), + "rcr{q}\t{1, $dst|$dst, 1}", []>; +def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt), "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; + +let Uses = [CL] in { +def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst), + "rcl{q}\t{%cl, $dst|$dst, CL}", []>; +def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst), + "rcr{q}\t{%cl, $dst|$dst, CL}", []>; +} } let isTwoAddress = 1 in { @@ -1466,9 +1469,13 @@ def CMOVNO64rm : RI<0x41, MRMSrcMem, // if !overflow, GR64 = [mem64] } // isTwoAddress // Use sbb to materialize carry flag into a GPR. +// FIXME: This are pseudo ops that should be replaced with Pat<> patterns. +// However, Pat<> can't replicate the destination reg into the inputs of the +// result. +// FIXME: Change this to have encoding Pseudo when X86MCCodeEmitter replaces +// X86CodeEmitter. let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in -def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), - "sbb{q}\t$dst, $dst", +def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "", [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), @@ -1606,8 +1613,7 @@ def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins), // when we have a better way to specify isel priority. let Defs = [EFLAGS], AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), - "", +def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "", [(set GR64:$dst, 0)]>; // Materialize i64 constant where top 32-bits are zero. This could theoretically @@ -1768,7 +1774,7 @@ def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; -def SWPGS : I<0x01, RawFrm, (outs), (ins), "swpgs", []>, TB; +def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB; def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins), "push{q}\t%fs", []>, TB; diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index 71ec178..e22a903 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -339,7 +339,6 @@ def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">; def FCOM64m : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{ll}\t$src">; def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{ll}\t$src">; -def FISTTP32m: FPI<0xDD, MRM1m, (outs i32mem:$dst), (ins), "fisttp{l}\t$dst">; def FRSTORm : FPI<0xDD, MRM4m, (outs f32mem:$dst), (ins), "frstor\t$dst">; def FSAVEm : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fnsave\t$dst">; def FNSTSWm : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fnstsw\t$dst">; diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index a799f16..bb81cbf 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -29,7 +29,16 @@ def MRM0m : Format<24>; def MRM1m : Format<25>; def MRM2m : Format<26>; def MRM3m : Format<27>; def MRM4m : Format<28>; def MRM5m : Format<29>; def MRM6m : Format<30>; def MRM7m : Format<31>; def MRMInitReg : Format<32>; - +def MRM_C1 : Format<33>; +def MRM_C2 : Format<34>; +def MRM_C3 : Format<35>; +def MRM_C4 : Format<36>; +def MRM_C8 : Format<37>; +def MRM_C9 : Format<38>; +def MRM_E8 : Format<39>; +def MRM_F0 : Format<40>; +def MRM_F8 : Format<41>; +def MRM_F9 : Format<42>; // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our @@ -37,11 +46,13 @@ def MRMInitReg : Format<32>; class ImmType<bits<3> val> { bits<3> Value = val; } -def NoImm : ImmType<0>; -def Imm8 : ImmType<1>; -def Imm16 : ImmType<2>; -def Imm32 : ImmType<3>; -def Imm64 : ImmType<4>; +def NoImm : ImmType<0>; +def Imm8 : ImmType<1>; +def Imm8PCRel : ImmType<2>; +def Imm16 : ImmType<3>; +def Imm32 : ImmType<4>; +def Imm32PCRel : ImmType<5>; +def Imm64 : ImmType<6>; // FPFormat - This specifies what form this FP instruction has. This is used by // the Floating-Point stackifier pass. @@ -121,6 +132,12 @@ class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm, let Pattern = pattern; let CodeSize = 3; } +class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, + list<dag> pattern> + : X86Inst<o, f, Imm8PCRel, outs, ins, asm> { + let Pattern = pattern; + let CodeSize = 3; +} class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern> : X86Inst<o, f, Imm16, outs, ins, asm> { @@ -134,6 +151,13 @@ class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm, let CodeSize = 3; } +class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, + list<dag> pattern> + : X86Inst<o, f, Imm32PCRel, outs, ins, asm> { + let Pattern = pattern; + let CodeSize = 3; +} + // FPStack Instruction Templates: // FPI - Floating Point Instruction template. class FPI<bits<8> o, Format F, dag outs, dag ins, string asm> diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td new file mode 100644 index 0000000..6b9478d --- /dev/null +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -0,0 +1,62 @@ +//======- X86InstrFragmentsSIMD.td - x86 ISA -------------*- tablegen -*-=====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides pattern fragments useful for SIMD instructions. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MMX Pattern Fragments +//===----------------------------------------------------------------------===// + +def load_mmx : PatFrag<(ops node:$ptr), (v1i64 (load node:$ptr))>; + +def bc_v8i8 : PatFrag<(ops node:$in), (v8i8 (bitconvert node:$in))>; +def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>; +def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>; +def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>; + +//===----------------------------------------------------------------------===// +// MMX Masks +//===----------------------------------------------------------------------===// + +// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to +// PSHUFW imm. +def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{ + return getI8Imm(X86::getShuffleSHUFImmediate(N)); +}]>; + +// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...> +def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...> +def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +// Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...> +def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); +}]>; + +// Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...> +def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N)); +}], MMX_SHUFFLE_get_shuf_imm>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 3ae352c..a0d0312 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1060,8 +1060,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, const TargetRegisterInfo *TRI) const { - DebugLoc DL = DebugLoc::getUnknownLoc(); - if (I != MBB.end()) DL = I->getDebugLoc(); + DebugLoc DL = MBB.findDebugLoc(I); if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { DestReg = TRI->getSubReg(DestReg, SubIdx); @@ -1588,44 +1587,44 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) { switch (BrOpc) { default: return X86::COND_INVALID; - case X86::JE: return X86::COND_E; - case X86::JNE: return X86::COND_NE; - case X86::JL: return X86::COND_L; - case X86::JLE: return X86::COND_LE; - case X86::JG: return X86::COND_G; - case X86::JGE: return X86::COND_GE; - case X86::JB: return X86::COND_B; - case X86::JBE: return X86::COND_BE; - case X86::JA: return X86::COND_A; - case X86::JAE: return X86::COND_AE; - case X86::JS: return X86::COND_S; - case X86::JNS: return X86::COND_NS; - case X86::JP: return X86::COND_P; - case X86::JNP: return X86::COND_NP; - case X86::JO: return X86::COND_O; - case X86::JNO: return X86::COND_NO; + case X86::JE_4: return X86::COND_E; + case X86::JNE_4: return X86::COND_NE; + case X86::JL_4: return X86::COND_L; + case X86::JLE_4: return X86::COND_LE; + case X86::JG_4: return X86::COND_G; + case X86::JGE_4: return X86::COND_GE; + case X86::JB_4: return X86::COND_B; + case X86::JBE_4: return X86::COND_BE; + case X86::JA_4: return X86::COND_A; + case X86::JAE_4: return X86::COND_AE; + case X86::JS_4: return X86::COND_S; + case X86::JNS_4: return X86::COND_NS; + case X86::JP_4: return X86::COND_P; + case X86::JNP_4: return X86::COND_NP; + case X86::JO_4: return X86::COND_O; + case X86::JNO_4: return X86::COND_NO; } } unsigned X86::GetCondBranchFromCond(X86::CondCode CC) { switch (CC) { default: llvm_unreachable("Illegal condition code!"); - case X86::COND_E: return X86::JE; - case X86::COND_NE: return X86::JNE; - case X86::COND_L: return X86::JL; - case X86::COND_LE: return X86::JLE; - case X86::COND_G: return X86::JG; - case X86::COND_GE: return X86::JGE; - case X86::COND_B: return X86::JB; - case X86::COND_BE: return X86::JBE; - case X86::COND_A: return X86::JA; - case X86::COND_AE: return X86::JAE; - case X86::COND_S: return X86::JS; - case X86::COND_NS: return X86::JNS; - case X86::COND_P: return X86::JP; - case X86::COND_NP: return X86::JNP; - case X86::COND_O: return X86::JO; - case X86::COND_NO: return X86::JNO; + case X86::COND_E: return X86::JE_4; + case X86::COND_NE: return X86::JNE_4; + case X86::COND_L: return X86::JL_4; + case X86::COND_LE: return X86::JLE_4; + case X86::COND_G: return X86::JG_4; + case X86::COND_GE: return X86::JGE_4; + case X86::COND_B: return X86::JB_4; + case X86::COND_BE: return X86::JBE_4; + case X86::COND_A: return X86::JA_4; + case X86::COND_AE: return X86::JAE_4; + case X86::COND_S: return X86::JS_4; + case X86::COND_NS: return X86::JNS_4; + case X86::COND_P: return X86::JP_4; + case X86::COND_NP: return X86::JNP_4; + case X86::COND_O: return X86::JO_4; + case X86::COND_NO: return X86::JNO_4; } } @@ -1695,7 +1694,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return true; // Handle unconditional branches. - if (I->getOpcode() == X86::JMP) { + if (I->getOpcode() == X86::JMP_4) { if (!AllowModify) { TBB = I->getOperand(0).getMBB(); continue; @@ -1779,7 +1778,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { while (I != MBB.begin()) { --I; - if (I->getOpcode() != X86::JMP && + if (I->getOpcode() != X86::JMP_4 && GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) break; // Remove the branch. @@ -1805,7 +1804,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, if (Cond.empty()) { // Unconditional branch? assert(!FBB && "Unconditional branch with multiple successors!"); - BuildMI(&MBB, dl, get(X86::JMP)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(TBB); return 1; } @@ -1815,16 +1814,16 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, switch (CC) { case X86::COND_NP_OR_E: // Synthesize NP_OR_E with two branches. - BuildMI(&MBB, dl, get(X86::JNP)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB); ++Count; - BuildMI(&MBB, dl, get(X86::JE)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB); ++Count; break; case X86::COND_NE_OR_P: // Synthesize NE_OR_P with two branches. - BuildMI(&MBB, dl, get(X86::JNE)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB); ++Count; - BuildMI(&MBB, dl, get(X86::JP)).addMBB(TBB); + BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB); ++Count; break; default: { @@ -1835,7 +1834,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, } if (FBB) { // Two-way Conditional branch. Insert the second branch. - BuildMI(&MBB, dl, get(X86::JMP)).addMBB(FBB); + BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(FBB); ++Count; } return Count; @@ -1851,8 +1850,7 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC) const { - DebugLoc DL = DebugLoc::getUnknownLoc(); - if (MI != MBB.end()) DL = MI->getDebugLoc(); + DebugLoc DL = MBB.findDebugLoc(MI); // Determine if DstRC and SrcRC have a common superclass in common. const TargetRegisterClass *CommonRC = DestRC; @@ -2079,8 +2077,7 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const MachineFunction &MF = *MBB.getParent(); bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); - DebugLoc DL = DebugLoc::getUnknownLoc(); - if (MI != MBB.end()) DL = MI->getDebugLoc(); + DebugLoc DL = MBB.findDebugLoc(MI); addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) .addReg(SrcReg, getKillRegState(isKill)); } @@ -2173,8 +2170,7 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const MachineFunction &MF = *MBB.getParent(); bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); - DebugLoc DL = DebugLoc::getUnknownLoc(); - if (MI != MBB.end()) DL = MI->getDebugLoc(); + DebugLoc DL = MBB.findDebugLoc(MI); addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); } @@ -3018,22 +3014,11 @@ isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass); } -unsigned X86InstrInfo::sizeOfImm(const TargetInstrDesc *Desc) { - switch (Desc->TSFlags & X86II::ImmMask) { - case X86II::Imm8: return 1; - case X86II::Imm16: return 2; - case X86II::Imm32: return 4; - case X86II::Imm64: return 8; - default: llvm_unreachable("Immediate size not set!"); - return 0; - } -} -/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended register? -/// e.g. r8, xmm8, etc. -bool X86InstrInfo::isX86_64ExtendedReg(const MachineOperand &MO) { - if (!MO.isReg()) return false; - switch (MO.getReg()) { +/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or higher) +/// register? e.g. r8, xmm8, xmm13, etc. +bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) { + switch (RegNo) { default: break; case X86::R8: case X86::R9: case X86::R10: case X86::R11: case X86::R12: case X86::R13: case X86::R14: case X86::R15: @@ -3387,24 +3372,24 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, switch (Opcode) { default: break; - case TargetInstrInfo::INLINEASM: { + case TargetOpcode::INLINEASM: { const MachineFunction *MF = MI.getParent()->getParent(); const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MF->getTarget().getMCAsmInfo()); break; } - case TargetInstrInfo::DBG_LABEL: - case TargetInstrInfo::EH_LABEL: + case TargetOpcode::DBG_LABEL: + case TargetOpcode::EH_LABEL: break; - case TargetInstrInfo::IMPLICIT_DEF: - case TargetInstrInfo::KILL: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: case X86::FP_REG_KILL: break; case X86::MOVPC32r: { // This emits the "call" portion of this pseudo instruction. ++FinalSize; - FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); break; } } @@ -3422,7 +3407,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, } else if (MO.isSymbol()) { FinalSize += sizeExternalSymbolAddress(false); } else if (MO.isImm()) { - FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); } else { llvm_unreachable("Unknown RawFrm operand!"); } @@ -3435,7 +3420,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, if (CurOp != NumOps) { const MachineOperand &MO1 = MI.getOperand(CurOp++); - unsigned Size = X86InstrInfo::sizeOfImm(Desc); + unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); if (MO1.isImm()) FinalSize += sizeConstant(Size); else { @@ -3460,7 +3445,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, CurOp += 2; if (CurOp != NumOps) { ++CurOp; - FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); } break; } @@ -3470,7 +3455,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, CurOp += X86AddrNumOperands + 1; if (CurOp != NumOps) { ++CurOp; - FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); } break; } @@ -3481,7 +3466,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, CurOp += 2; if (CurOp != NumOps) { ++CurOp; - FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); } break; @@ -3498,7 +3483,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, CurOp += AddrOperands + 1; if (CurOp != NumOps) { ++CurOp; - FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); + FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); } break; } @@ -3523,7 +3508,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, if (CurOp != NumOps) { const MachineOperand &MO1 = MI.getOperand(CurOp++); - unsigned Size = X86InstrInfo::sizeOfImm(Desc); + unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); if (MO1.isImm()) FinalSize += sizeConstant(Size); else { @@ -3553,7 +3538,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, if (CurOp != NumOps) { const MachineOperand &MO = MI.getOperand(CurOp++); - unsigned Size = X86InstrInfo::sizeOfImm(Desc); + unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); if (MO.isImm()) FinalSize += sizeConstant(Size); else { @@ -3571,6 +3556,14 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, } } break; + + case X86II::MRM_C1: + case X86II::MRM_C8: + case X86II::MRM_C9: + case X86II::MRM_E8: + case X86II::MRM_F0: + FinalSize += 2; + break; } case X86II::MRMInitReg: @@ -3619,8 +3612,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { // Insert the set of GlobalBaseReg into the first MBB of the function MachineBasicBlock &FirstMBB = MF->front(); MachineBasicBlock::iterator MBBI = FirstMBB.begin(); - DebugLoc DL = DebugLoc::getUnknownLoc(); - if (MBBI != FirstMBB.end()) DL = MBBI->getDebugLoc(); + DebugLoc DL = FirstMBB.findDebugLoc(MBBI); MachineRegisterInfo &RegInfo = MF->getRegInfo(); unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 4f35d0d..5111719 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -18,7 +18,6 @@ #include "X86.h" #include "X86RegisterInfo.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/Target/TargetRegisterInfo.h" namespace llvm { class X86RegisterInfo; @@ -269,6 +268,18 @@ namespace X86II { // MRMInitReg - This form is used for instructions whose source and // destinations are the same register. MRMInitReg = 32, + + //// MRM_C1 - A mod/rm byte of exactly 0xC1. + MRM_C1 = 33, + MRM_C2 = 34, + MRM_C3 = 35, + MRM_C4 = 36, + MRM_C8 = 37, + MRM_C9 = 38, + MRM_E8 = 39, + MRM_F0 = 40, + MRM_F8 = 41, + MRM_F9 = 42, FormMask = 63, @@ -332,11 +343,13 @@ namespace X86II { // This three-bit field describes the size of an immediate operand. Zero is // unused so that we can tell if we forgot to set a value. ImmShift = 13, - ImmMask = 7 << ImmShift, - Imm8 = 1 << ImmShift, - Imm16 = 2 << ImmShift, - Imm32 = 3 << ImmShift, - Imm64 = 4 << ImmShift, + ImmMask = 7 << ImmShift, + Imm8 = 1 << ImmShift, + Imm8PCRel = 2 << ImmShift, + Imm16 = 3 << ImmShift, + Imm32 = 4 << ImmShift, + Imm32PCRel = 5 << ImmShift, + Imm64 = 6 << ImmShift, //===------------------------------------------------------------------===// // FP Instruction Classification... Zero is non-fp instruction. @@ -389,6 +402,47 @@ namespace X86II { OpcodeShift = 24, OpcodeMask = 0xFF << OpcodeShift }; + + // getBaseOpcodeFor - This function returns the "base" X86 opcode for the + // specified machine instruction. + // + static inline unsigned char getBaseOpcodeFor(unsigned TSFlags) { + return TSFlags >> X86II::OpcodeShift; + } + + static inline bool hasImm(unsigned TSFlags) { + return (TSFlags & X86II::ImmMask) != 0; + } + + /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field + /// of the specified instruction. + static inline unsigned getSizeOfImm(unsigned TSFlags) { + switch (TSFlags & X86II::ImmMask) { + default: assert(0 && "Unknown immediate size"); + case X86II::Imm8: + case X86II::Imm8PCRel: return 1; + case X86II::Imm16: return 2; + case X86II::Imm32: + case X86II::Imm32PCRel: return 4; + case X86II::Imm64: return 8; + } + } + + /// isImmPCRel - Return true if the immediate of the specified instruction's + /// TSFlags indicates that it is pc relative. + static inline unsigned isImmPCRel(unsigned TSFlags) { + switch (TSFlags & X86II::ImmMask) { + default: assert(0 && "Unknown immediate size"); + case X86II::Imm8PCRel: + case X86II::Imm32PCRel: + return true; + case X86II::Imm8: + case X86II::Imm16: + case X86II::Imm32: + case X86II::Imm64: + return false; + } + } } const int X86AddrNumOperands = 5; @@ -637,25 +691,21 @@ public: /// instruction that defines the specified register class. bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; - // getBaseOpcodeFor - This function returns the "base" X86 opcode for the - // specified machine instruction. - // - unsigned char getBaseOpcodeFor(const TargetInstrDesc *TID) const { - return TID->TSFlags >> X86II::OpcodeShift; - } - unsigned char getBaseOpcodeFor(unsigned Opcode) const { - return getBaseOpcodeFor(&get(Opcode)); - } - static bool isX86_64NonExtLowByteReg(unsigned reg) { return (reg == X86::SPL || reg == X86::BPL || reg == X86::SIL || reg == X86::DIL); } - static unsigned sizeOfImm(const TargetInstrDesc *Desc); - static bool isX86_64ExtendedReg(const MachineOperand &MO); + static bool isX86_64ExtendedReg(const MachineOperand &MO) { + if (!MO.isReg()) return false; + return isX86_64ExtendedReg(MO.getReg()); + } static unsigned determineREX(const MachineInstr &MI); + /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or + /// higher) register? e.g. r8, xmm8, xmm13, etc. + static bool isX86_64ExtendedReg(unsigned RegNo); + /// GetInstSize - Returns the size of the specified MachineInstr. /// virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 396cb53..25cd297 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -182,10 +182,6 @@ def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; // X86 Operand Definitions. // -def i32imm_pcrel : Operand<i32> { - let PrintMethod = "print_pcrel_imm"; -} - // A version of ptr_rc which excludes SP, ESP, and RSP. This is used for // the index operand of an address, to conform to x86 encoding restrictions. def ptr_rc_nosp : PointerLikeRegClass<1>; @@ -196,6 +192,14 @@ def X86MemAsmOperand : AsmOperandClass { let Name = "Mem"; let SuperClass = ?; } +def X86AbsMemAsmOperand : AsmOperandClass { + let Name = "AbsMem"; + let SuperClass = X86MemAsmOperand; +} +def X86NoSegMemAsmOperand : AsmOperandClass { + let Name = "NoSegMem"; + let SuperClass = X86MemAsmOperand; +} class X86MemOperand<string printMethod> : Operand<iPTR> { let PrintMethod = printMethod; let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); @@ -207,11 +211,6 @@ def opaque48mem : X86MemOperand<"printopaquemem">; def opaque80mem : X86MemOperand<"printopaquemem">; def opaque512mem : X86MemOperand<"printopaquemem">; -def offset8 : Operand<i64> { let PrintMethod = "print_pcrel_imm"; } -def offset16 : Operand<i64> { let PrintMethod = "print_pcrel_imm"; } -def offset32 : Operand<i64> { let PrintMethod = "print_pcrel_imm"; } -def offset64 : Operand<i64> { let PrintMethod = "print_pcrel_imm"; } - def i8mem : X86MemOperand<"printi8mem">; def i16mem : X86MemOperand<"printi16mem">; def i32mem : X86MemOperand<"printi32mem">; @@ -235,7 +234,22 @@ def i8mem_NOREX : Operand<i64> { def lea32mem : Operand<i32> { let PrintMethod = "printlea32mem"; let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm); - let ParserMatchClass = X86MemAsmOperand; + let ParserMatchClass = X86NoSegMemAsmOperand; +} + +let ParserMatchClass = X86AbsMemAsmOperand, + PrintMethod = "print_pcrel_imm" in { +def i32imm_pcrel : Operand<i32>; + +def offset8 : Operand<i64>; +def offset16 : Operand<i64>; +def offset32 : Operand<i64>; +def offset64 : Operand<i64>; + +// Branch targets have OtherVT type and print as pc-relative values. +def brtarget : Operand<OtherVT>; +def brtarget8 : Operand<OtherVT>; + } def SSECC : Operand<i8> { @@ -257,15 +271,6 @@ def i32i8imm : Operand<i32> { let ParserMatchClass = ImmSExt8AsmOperand; } -// Branch targets have OtherVT type and print as pc-relative values. -def brtarget : Operand<OtherVT> { - let PrintMethod = "print_pcrel_imm"; -} - -def brtarget8 : Operand<OtherVT> { - let PrintMethod = "print_pcrel_imm"; -} - //===----------------------------------------------------------------------===// // X86 Complex Pattern Definitions. // @@ -591,7 +596,7 @@ let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in "", []>; //===----------------------------------------------------------------------===// -// Control Flow Instructions... +// Control Flow Instructions. // // Return instructions. @@ -609,16 +614,46 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, "lret\t$amt", []>; } -// All branches are RawFrm, Void, Branch, and Terminators -let isBranch = 1, isTerminator = 1 in - class IBr<bits<8> opcode, dag ins, string asm, list<dag> pattern> : - I<opcode, RawFrm, (outs), ins, asm, pattern>; +// Unconditional branches. +let isBarrier = 1, isBranch = 1, isTerminator = 1 in { + def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst), + "jmp\t$dst", [(br bb:$dst)]>; + def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst), + "jmp\t$dst", []>; +} -let isBranch = 1, isBarrier = 1 in { - def JMP : IBr<0xE9, (ins brtarget:$dst), "jmp\t$dst", [(br bb:$dst)]>; - def JMP8 : IBr<0xEB, (ins brtarget8:$dst), "jmp\t$dst", []>; +// Conditional Branches. +let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in { + multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> { + def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>; + def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm, + [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB; + } } +defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>; +defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>; +defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>; +defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>; +defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>; +defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>; +defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>; +defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>; +defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>; +defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>; +defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>; +defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>; +defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>; +defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>; +defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>; +defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>; + +// FIXME: What about the CX/RCX versions of this instruction? +let Uses = [ECX], isBranch = 1, isTerminator = 1 in + def JCXZ8 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst), + "jcxz\t$dst", []>; + + // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst", @@ -639,63 +674,6 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { "ljmp{l}\t{*}$dst", []>; } -// Conditional branches -let Uses = [EFLAGS] in { -// Short conditional jumps -def JO8 : IBr<0x70, (ins brtarget8:$dst), "jo\t$dst", []>; -def JNO8 : IBr<0x71, (ins brtarget8:$dst), "jno\t$dst", []>; -def JB8 : IBr<0x72, (ins brtarget8:$dst), "jb\t$dst", []>; -def JAE8 : IBr<0x73, (ins brtarget8:$dst), "jae\t$dst", []>; -def JE8 : IBr<0x74, (ins brtarget8:$dst), "je\t$dst", []>; -def JNE8 : IBr<0x75, (ins brtarget8:$dst), "jne\t$dst", []>; -def JBE8 : IBr<0x76, (ins brtarget8:$dst), "jbe\t$dst", []>; -def JA8 : IBr<0x77, (ins brtarget8:$dst), "ja\t$dst", []>; -def JS8 : IBr<0x78, (ins brtarget8:$dst), "js\t$dst", []>; -def JNS8 : IBr<0x79, (ins brtarget8:$dst), "jns\t$dst", []>; -def JP8 : IBr<0x7A, (ins brtarget8:$dst), "jp\t$dst", []>; -def JNP8 : IBr<0x7B, (ins brtarget8:$dst), "jnp\t$dst", []>; -def JL8 : IBr<0x7C, (ins brtarget8:$dst), "jl\t$dst", []>; -def JGE8 : IBr<0x7D, (ins brtarget8:$dst), "jge\t$dst", []>; -def JLE8 : IBr<0x7E, (ins brtarget8:$dst), "jle\t$dst", []>; -def JG8 : IBr<0x7F, (ins brtarget8:$dst), "jg\t$dst", []>; - -def JCXZ8 : IBr<0xE3, (ins brtarget8:$dst), "jcxz\t$dst", []>; - -def JE : IBr<0x84, (ins brtarget:$dst), "je\t$dst", - [(X86brcond bb:$dst, X86_COND_E, EFLAGS)]>, TB; -def JNE : IBr<0x85, (ins brtarget:$dst), "jne\t$dst", - [(X86brcond bb:$dst, X86_COND_NE, EFLAGS)]>, TB; -def JL : IBr<0x8C, (ins brtarget:$dst), "jl\t$dst", - [(X86brcond bb:$dst, X86_COND_L, EFLAGS)]>, TB; -def JLE : IBr<0x8E, (ins brtarget:$dst), "jle\t$dst", - [(X86brcond bb:$dst, X86_COND_LE, EFLAGS)]>, TB; -def JG : IBr<0x8F, (ins brtarget:$dst), "jg\t$dst", - [(X86brcond bb:$dst, X86_COND_G, EFLAGS)]>, TB; -def JGE : IBr<0x8D, (ins brtarget:$dst), "jge\t$dst", - [(X86brcond bb:$dst, X86_COND_GE, EFLAGS)]>, TB; - -def JB : IBr<0x82, (ins brtarget:$dst), "jb\t$dst", - [(X86brcond bb:$dst, X86_COND_B, EFLAGS)]>, TB; -def JBE : IBr<0x86, (ins brtarget:$dst), "jbe\t$dst", - [(X86brcond bb:$dst, X86_COND_BE, EFLAGS)]>, TB; -def JA : IBr<0x87, (ins brtarget:$dst), "ja\t$dst", - [(X86brcond bb:$dst, X86_COND_A, EFLAGS)]>, TB; -def JAE : IBr<0x83, (ins brtarget:$dst), "jae\t$dst", - [(X86brcond bb:$dst, X86_COND_AE, EFLAGS)]>, TB; - -def JS : IBr<0x88, (ins brtarget:$dst), "js\t$dst", - [(X86brcond bb:$dst, X86_COND_S, EFLAGS)]>, TB; -def JNS : IBr<0x89, (ins brtarget:$dst), "jns\t$dst", - [(X86brcond bb:$dst, X86_COND_NS, EFLAGS)]>, TB; -def JP : IBr<0x8A, (ins brtarget:$dst), "jp\t$dst", - [(X86brcond bb:$dst, X86_COND_P, EFLAGS)]>, TB; -def JNP : IBr<0x8B, (ins brtarget:$dst), "jnp\t$dst", - [(X86brcond bb:$dst, X86_COND_NP, EFLAGS)]>, TB; -def JO : IBr<0x80, (ins brtarget:$dst), "jo\t$dst", - [(X86brcond bb:$dst, X86_COND_O, EFLAGS)]>, TB; -def JNO : IBr<0x81, (ins brtarget:$dst), "jno\t$dst", - [(X86brcond bb:$dst, X86_COND_NO, EFLAGS)]>, TB; -} // Uses = [EFLAGS] // Loop instructions @@ -716,7 +694,7 @@ let isCall = 1 in XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], Uses = [ESP] in { - def CALLpcrel32 : Ii32<0xE8, RawFrm, + def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm, (outs), (ins i32imm_pcrel:$dst,variable_ops), "call\t$dst", []>; def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops), @@ -756,15 +734,18 @@ def TCRETURNri : I<0, Pseudo, (outs), "#TC_RETURN $dst $offset", []>; -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in - def TAILJMPd : IBr<0xE9, (ins i32imm_pcrel:$dst), "jmp\t$dst # TAILCALL", +// FIXME: The should be pseudo instructions that are lowered when going to +// mcinst. +let isCall = 1, isBranch = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in + def TAILJMPd : Ii32<0xE9, RawFrm, (outs),(ins i32imm_pcrel:$dst,variable_ops), + "jmp\t$dst # TAILCALL", []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in - def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), + def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst, variable_ops), "jmp{l}\t{*}$dst # TAILCALL", []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in - def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), + def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem:$dst, variable_ops), "jmp\t{*}$dst # TAILCALL", []>; //===----------------------------------------------------------------------===// @@ -877,7 +858,7 @@ def LEA32r : I<0x8D, MRMSrcMem, "lea{l}\t{$src|$dst}, {$dst|$src}", [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>; -let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI] in { +let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in { def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", [(X86rep_movs i8)]>, REP; def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", @@ -886,16 +867,31 @@ def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", [(X86rep_movs i32)]>, REP; } -let Defs = [ECX,EDI], Uses = [AL,ECX,EDI] in +// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI +let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in { +def MOVSB : I<0xA4, RawFrm, (outs), (ins), "{movsb}", []>; +def MOVSW : I<0xA5, RawFrm, (outs), (ins), "{movsw}", []>, OpSize; +def MOVSD : I<0xA5, RawFrm, (outs), (ins), "{movsl|movsd}", []>; +} + +let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", [(X86rep_stos i8)]>, REP; -let Defs = [ECX,EDI], Uses = [AX,ECX,EDI] in +let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", [(X86rep_stos i16)]>, REP, OpSize; -let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI] in +let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", [(X86rep_stos i32)]>, REP; +// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI +let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in +def STOSB : I<0xAA, RawFrm, (outs), (ins), "{stosb}", []>; +let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in +def STOSW : I<0xAB, RawFrm, (outs), (ins), "{stosw}", []>, OpSize; +let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in +def STOSD : I<0xAB, RawFrm, (outs), (ins), "{stosl|stosd}", []>; + def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>; def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize; def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>; @@ -908,6 +904,9 @@ let Defs = [RAX, RDX] in def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB; +let Defs = [RAX, RCX, RDX] in +def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB; + let isBarrier = 1, hasCtrlDep = 1 in { def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB; } @@ -996,6 +995,7 @@ def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src), "mov{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, imm:$src)]>; } + def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(store (i8 imm:$src), addr:$dst)]>; @@ -2306,98 +2306,100 @@ let isTwoAddress = 0 in { def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src), "rcl{b}\t{1, $dst|$dst, 1}", []>; -def RCL8m1 : I<0xD0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src), - "rcl{b}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src), "rcl{b}\t{%cl, $dst|$dst, CL}", []>; -def RCL8mCL : I<0xD2, MRM2m, (outs i8mem:$dst), (ins i8mem:$src), - "rcl{b}\t{%cl, $dst|$dst, CL}", []>; } def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt), "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL8mi : Ii8<0xC0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt), - "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src), "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; -def RCL16m1 : I<0xD1, MRM2m, (outs i16mem:$dst), (ins i16mem:$src), - "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; let Uses = [CL] in { def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src), "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; -def RCL16mCL : I<0xD3, MRM2m, (outs i16mem:$dst), (ins i16mem:$src), - "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; } def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCL16mi : Ii8<0xC1, MRM2m, (outs i16mem:$dst), - (ins i16mem:$src, i8imm:$cnt), - "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src), "rcl{l}\t{1, $dst|$dst, 1}", []>; -def RCL32m1 : I<0xD1, MRM2m, (outs i32mem:$dst), (ins i32mem:$src), - "rcl{l}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src), "rcl{l}\t{%cl, $dst|$dst, CL}", []>; -def RCL32mCL : I<0xD3, MRM2m, (outs i32mem:$dst), (ins i32mem:$src), - "rcl{l}\t{%cl, $dst|$dst, CL}", []>; } def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL32mi : Ii8<0xC1, MRM2m, (outs i32mem:$dst), - (ins i32mem:$src, i8imm:$cnt), - "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src), "rcr{b}\t{1, $dst|$dst, 1}", []>; -def RCR8m1 : I<0xD0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src), - "rcr{b}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src), "rcr{b}\t{%cl, $dst|$dst, CL}", []>; -def RCR8mCL : I<0xD2, MRM3m, (outs i8mem:$dst), (ins i8mem:$src), - "rcr{b}\t{%cl, $dst|$dst, CL}", []>; } def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt), "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR8mi : Ii8<0xC0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt), - "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src), "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; -def RCR16m1 : I<0xD1, MRM3m, (outs i16mem:$dst), (ins i16mem:$src), - "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; let Uses = [CL] in { def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src), "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; -def RCR16mCL : I<0xD3, MRM3m, (outs i16mem:$dst), (ins i16mem:$src), - "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; } def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCR16mi : Ii8<0xC1, MRM3m, (outs i16mem:$dst), - (ins i16mem:$src, i8imm:$cnt), - "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src), "rcr{l}\t{1, $dst|$dst, 1}", []>; -def RCR32m1 : I<0xD1, MRM3m, (outs i32mem:$dst), (ins i32mem:$src), - "rcr{l}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src), "rcr{l}\t{%cl, $dst|$dst, CL}", []>; -def RCR32mCL : I<0xD3, MRM3m, (outs i32mem:$dst), (ins i32mem:$src), - "rcr{l}\t{%cl, $dst|$dst, CL}", []>; } def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR32mi : Ii8<0xC1, MRM3m, (outs i32mem:$dst), - (ins i32mem:$src, i8imm:$cnt), + +let isTwoAddress = 0 in { +def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst), + "rcl{b}\t{1, $dst|$dst, 1}", []>; +def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt), + "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst), + "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; +def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt), + "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst), + "rcl{l}\t{1, $dst|$dst, 1}", []>; +def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt), + "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst), + "rcr{b}\t{1, $dst|$dst, 1}", []>; +def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt), + "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst), + "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; +def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt), + "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst), + "rcr{l}\t{1, $dst|$dst, 1}", []>; +def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt), "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; +let Uses = [CL] in { +def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst), + "rcl{b}\t{%cl, $dst|$dst, CL}", []>; +def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst), + "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst), + "rcl{l}\t{%cl, $dst|$dst, CL}", []>; +def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst), + "rcr{b}\t{%cl, $dst|$dst, CL}", []>; +def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst), + "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst), + "rcr{l}\t{%cl, $dst|$dst, CL}", []>; +} +} + // FIXME: provide shorter instructions when imm8 == 1 let Uses = [CL] in { def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), @@ -3006,8 +3008,8 @@ let isTwoAddress = 0 in { def SBB32mr : I<0x19, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), "sbb{l}\t{$src2, $dst|$dst, $src2}", [(store (sube (load addr:$dst), GR32:$src2), addr:$dst)]>; - def SBB8mi : Ii32<0x80, MRM3m, (outs), (ins i8mem:$dst, i8imm:$src2), - "sbb{b}\t{$src2, $dst|$dst, $src2}", + def SBB8mi : Ii8<0x80, MRM3m, (outs), (ins i8mem:$dst, i8imm:$src2), + "sbb{b}\t{$src2, $dst|$dst, $src2}", [(store (sube (loadi8 addr:$dst), imm:$src2), addr:$dst)]>; def SBB16mi : Ii16<0x81, MRM3m, (outs), (ins i16mem:$dst, i16imm:$src2), "sbb{w}\t{$src2, $dst|$dst, $src2}", @@ -3234,17 +3236,18 @@ def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags let Uses = [EFLAGS] in { // Use sbb to materialize carry bit. - let Defs = [EFLAGS], isCodeGenOnly = 1 in { -def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), - "sbb{b}\t$dst, $dst", +// FIXME: These are pseudo ops that should be replaced with Pat<> patterns. +// However, Pat<> can't replicate the destination reg into the inputs of the +// result. +// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces +// X86CodeEmitter. +def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "", [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; -def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), - "sbb{w}\t$dst, $dst", +def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "", [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>, OpSize; -def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), - "sbb{l}\t$dst, $dst", +def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "", [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; } // isCodeGenOnly @@ -3681,7 +3684,7 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movz{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB; -// These are the same as the regular regular MOVZX32rr8 and MOVZX32rm8 +// These are the same as the regular MOVZX32rr8 and MOVZX32rm8 // except that they use GR32_NOREX for the output operand register class // instead of GR32. This allows them to operate on h registers on x86-64. def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg, @@ -3716,10 +3719,10 @@ let neverHasSideEffects = 1 in { // Alias instructions that map movr0 to xor. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. +// FIXME: Set encoding to pseudo. let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1 in { -def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), - "xor{b}\t$dst, $dst", +def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", [(set GR8:$dst, 0)]>; // We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller @@ -3731,8 +3734,8 @@ def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), "", [(set GR16:$dst, 0)]>, OpSize; -def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), - "xor{l}\t$dst, $dst", +// FIXME: Set encoding to pseudo. +def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", [(set GR32:$dst, 0)]>; } @@ -4077,7 +4080,7 @@ def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; -def INVLPG : I<0x01, RawFrm, (outs), (ins), "invlpg", []>, TB; +def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB; def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins), "str{w}\t{$dst}", []>, TB; @@ -4155,6 +4158,26 @@ def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src), def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src), "lldt{w}\t$src", []>, TB; +// Lock instruction prefix +def LOCK_PREFIX : I<0xF0, RawFrm, (outs), (ins), "lock", []>; + +// Repeat string operation instruction prefixes +// These uses the DF flag in the EFLAGS register to inc or dec ECX +let Defs = [ECX], Uses = [ECX,EFLAGS] in { +// Repeat (used with INS, OUTS, MOVS, LODS and STOS) +def REP_PREFIX : I<0xF3, RawFrm, (outs), (ins), "rep", []>; +// Repeat while not equal (used with CMPS and SCAS) +def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>; +} + +// Segment override instruction prefixes +def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>; +def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>; +def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>; +def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>; +def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>; +def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>; + // String manipulation instructions def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>; @@ -4219,17 +4242,17 @@ def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB; // VMX instructions // 66 0F 38 80 -def INVEPT : I<0x38, RawFrm, (outs), (ins), "invept", []>, OpSize, TB; +def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8; // 66 0F 38 81 -def INVVPID : I<0x38, RawFrm, (outs), (ins), "invvpid", []>, OpSize, TB; +def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8; // 0F 01 C1 -def VMCALL : I<0x01, RawFrm, (outs), (ins), "vmcall", []>, TB; +def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB; def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), "vmclear\t$vmcs", []>, OpSize, TB; // 0F 01 C2 -def VMLAUNCH : I<0x01, RawFrm, (outs), (ins), "vmlaunch", []>, TB; +def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB; // 0F 01 C3 -def VMRESUME : I<0x01, RawFrm, (outs), (ins), "vmresume", []>, TB; +def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB; def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), "vmptrld\t$vmcs", []>, TB; def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins), @@ -4251,7 +4274,7 @@ def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB; // 0F 01 C4 -def VMXOFF : I<0x01, RawFrm, (outs), (ins), "vmxoff", []>, OpSize; +def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB; def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon), "vmxon\t{$vmxon}", []>, XD; @@ -5181,6 +5204,12 @@ include "X86InstrFPStack.td" include "X86Instr64bit.td" //===----------------------------------------------------------------------===// +// SIMD support (SSE, MMX and AVX) +//===----------------------------------------------------------------------===// + +include "X86InstrFragmentsSIMD.td" + +//===----------------------------------------------------------------------===// // XMM Floating point support (requires SSE / SSE2) //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index fc40c9a..89f020c 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -14,56 +14,6 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// MMX Pattern Fragments -//===----------------------------------------------------------------------===// - -def load_mmx : PatFrag<(ops node:$ptr), (v1i64 (load node:$ptr))>; - -def bc_v8i8 : PatFrag<(ops node:$in), (v8i8 (bitconvert node:$in))>; -def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>; -def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>; -def bc_v1i64 : PatFrag<(ops node:$in), (v1i64 (bitconvert node:$in))>; - -//===----------------------------------------------------------------------===// -// MMX Masks -//===----------------------------------------------------------------------===// - -// MMX_SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to -// PSHUFW imm. -def MMX_SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{ - return getI8Imm(X86::getShuffleSHUFImmediate(N)); -}]>; - -// Patterns for: vector_shuffle v1, v2, <2, 6, 3, 7, ...> -def mmx_unpckh : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -// Patterns for: vector_shuffle v1, v2, <0, 4, 2, 5, ...> -def mmx_unpckl : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -// Patterns for: vector_shuffle v1, <undef>, <0, 0, 1, 1, ...> -def mmx_unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; - -// Patterns for: vector_shuffle v1, <undef>, <2, 2, 3, 3, ...> -def mmx_unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N)); -}], MMX_SHUFFLE_get_shuf_imm>; - -//===----------------------------------------------------------------------===// // MMX Multiclasses //===----------------------------------------------------------------------===// @@ -501,6 +451,20 @@ let Constraints = "$src1 = $dst" in { (iPTR imm:$src3))))]>; } +// MMX to XMM for vector types +def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1, + [SDTCisVT<0, v2i64>, SDTCisVT<1, v1i64>]>>; + +def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)), + (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; + +def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))), + (v2i64 (MOVQI2PQIrm addr:$src))>; + +def : Pat<(v2i64 (MMX_X86movq2dq (v1i64 (bitconvert + (v2i32 (scalar_to_vector (loadi32 addr:$src))))))), + (v2i64 (MOVDI2PDIrm addr:$src))>; + // Mask creation def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src), "pmovmskb\t{$src, $dst|$dst, $src}", @@ -522,11 +486,10 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), // Alias instructions that map zero vector to pxor. let isReMaterializable = 1, isCodeGenOnly = 1 in { - def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins), - "pxor\t$dst, $dst", + // FIXME: Change encoding to pseudo. + def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins), "", [(set VR64:$dst, (v2i32 immAllZerosV))]>; - def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (outs VR64:$dst), (ins), - "pcmpeqd\t$dst, $dst", + def MMX_V_SETALLONES : MMXI<0x76, MRMInitReg, (outs VR64:$dst), (ins), "", [(set VR64:$dst, (v2i32 immAllOnesV))]>; } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 94b9b55..9b2140f 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -505,9 +505,10 @@ def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), // Alias instructions that map fld0 to pxor for sse. let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, canFoldAsLoad = 1 in -def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), - "pxor\t$dst, $dst", [(set FR32:$dst, fp32imm0)]>, - Requires<[HasSSE1]>, TB, OpSize; + // FIXME: Set encoding to pseudo! +def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", + [(set FR32:$dst, fp32imm0)]>, + Requires<[HasSSE1]>, TB, OpSize; // Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are // disregarded. @@ -761,6 +762,9 @@ let Constraints = "$src1 = $dst" in { } // Constraints = "$src1 = $dst" +def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), + (MOVHPSrm VR128:$src1, addr:$src2)>; + def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), @@ -1025,10 +1029,10 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), // Alias instructions that map zero vector to pxor / xorp* for sse. // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. +// FIXME: Change encoding to pseudo! let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isCodeGenOnly = 1 in -def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), - "xorps\t$dst, $dst", +def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllZerosV))]>; let Predicates = [HasSSE1] in { @@ -1269,8 +1273,8 @@ def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), // Alias instructions that map fld0 to pxor for sse. let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, canFoldAsLoad = 1 in -def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), - "pxor\t$dst, $dst", [(set FR64:$dst, fpimm0)]>, +def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", + [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>, TB, OpSize; // Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are @@ -2311,9 +2315,9 @@ def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), TB, Requires<[HasSSE2]>; // Load, store, and memory fence -def LFENCE : I<0xAE, MRM5r, (outs), (ins), +def LFENCE : I<0xAE, MRM_E8, (outs), (ins), "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; -def MFENCE : I<0xAE, MRM6r, (outs), (ins), +def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; //TODO: custom lower this so as to never even generate the noop @@ -2329,8 +2333,8 @@ def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), // load of an all-ones value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isCodeGenOnly = 1 in - def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), - "pcmpeqd\t$dst, $dst", + // FIXME: Change encoding to pseudo. + def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>; // FR64 to 128-bit vector conversion. @@ -2612,9 +2616,9 @@ let Constraints = "$src1 = $dst" in { } // Thread synchronization -def MONITOR : I<0x01, MRM1r, (outs), (ins), "monitor", +def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor", [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>; -def MWAIT : I<0x01, MRM1r, (outs), (ins), "mwait", +def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait", [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; // vector_shuffle v1, <undef> <1, 1, 3, 3> @@ -3746,7 +3750,8 @@ def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movntdqa\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>; + [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, + OpSize; //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index f363903..d297d24 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -297,6 +297,7 @@ extern "C" { push edx push ecx and esp, -16 + sub esp, 16 mov eax, dword ptr [ebp+4] mov dword ptr [esp+4], eax mov dword ptr [esp], ebp diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp index 1738d49..91c0fbb 100644 --- a/lib/Target/X86/X86MCAsmInfo.cpp +++ b/lib/Target/X86/X86MCAsmInfo.cpp @@ -55,9 +55,6 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) { if (!is64Bit) Data64bitsDirective = 0; // we can't emit a 64-bit unit - // Leopard and above support aligned common symbols. - COMMDirectiveTakesAlignment = Triple.getDarwinMajorNumber() >= 9; - CommentString = "##"; PCSymbol = "."; @@ -75,7 +72,6 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &Triple) { PrivateGlobalPrefix = ".L"; WeakRefDirective = "\t.weak\t"; - SetDirective = "\t.set\t"; PCSymbol = "."; // Set up DWARF directives @@ -98,27 +94,4 @@ MCSection *X86ELFMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const { X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) { AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; -} - - -X86WinMCAsmInfo::X86WinMCAsmInfo(const Triple &Triple) { - AsmTransCBE = x86_asm_table; - AssemblerDialect = AsmWriterFlavor; - - GlobalPrefix = "_"; - CommentString = ";"; - - PrivateGlobalPrefix = "$"; - AlignDirective = "\tALIGN\t"; - ZeroDirective = "\tdb\t"; - AsciiDirective = "\tdb\t"; - AscizDirective = 0; - Data8bitsDirective = "\tdb\t"; - Data16bitsDirective = "\tdw\t"; - Data32bitsDirective = "\tdd\t"; - Data64bitsDirective = "\tdq\t"; - HasDotTypeDotSizeDirective = false; - HasSingleParameterDotFile = false; - - AlignmentIsInBytes = true; -} +}
\ No newline at end of file diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/X86MCAsmInfo.h index ca227b7..69716bf 100644 --- a/lib/Target/X86/X86MCAsmInfo.h +++ b/lib/Target/X86/X86MCAsmInfo.h @@ -33,11 +33,6 @@ namespace llvm { struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF { explicit X86MCAsmInfoCOFF(const Triple &Triple); }; - - struct X86WinMCAsmInfo : public MCAsmInfo { - explicit X86WinMCAsmInfo(const Triple &Triple); - }; - } // namespace llvm #endif diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp new file mode 100644 index 0000000..3f18696 --- /dev/null +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -0,0 +1,645 @@ +//===-- X86/X86MCCodeEmitter.cpp - Convert X86 code to machine code -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the X86MCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "x86-emitter" +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86FixupKinds.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { +class X86MCCodeEmitter : public MCCodeEmitter { + X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT + const TargetMachine &TM; + const TargetInstrInfo &TII; + MCContext &Ctx; + bool Is64BitMode; +public: + X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit) + : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) { + Is64BitMode = is64Bit; + } + + ~X86MCCodeEmitter() {} + + unsigned getNumFixupKinds() const { + return 3; + } + + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[] = { + { "reloc_pcrel_4byte", 0, 4 * 8 }, + { "reloc_pcrel_1byte", 0, 1 * 8 }, + { "reloc_riprel_4byte", 0, 4 * 8 } + }; + + if (Kind < FirstTargetFixupKind) + return MCCodeEmitter::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; + } + + static unsigned GetX86RegNum(const MCOperand &MO) { + return X86RegisterInfo::getX86RegNum(MO.getReg()); + } + + void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const { + OS << (char)C; + ++CurByte; + } + + void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte, + raw_ostream &OS) const { + // Output the constant in little endian byte order. + for (unsigned i = 0; i != Size; ++i) { + EmitByte(Val & 255, CurByte, OS); + Val >>= 8; + } + } + + void EmitImmediate(const MCOperand &Disp, + unsigned ImmSize, MCFixupKind FixupKind, + unsigned &CurByte, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + int ImmOffset = 0) const; + + inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode, + unsigned RM) { + assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!"); + return RM | (RegOpcode << 3) | (Mod << 6); + } + + void EmitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld, + unsigned &CurByte, raw_ostream &OS) const { + EmitByte(ModRMByte(3, RegOpcodeFld, GetX86RegNum(ModRMReg)), CurByte, OS); + } + + void EmitSIBByte(unsigned SS, unsigned Index, unsigned Base, + unsigned &CurByte, raw_ostream &OS) const { + // SIB byte is in the same format as the ModRMByte. + EmitByte(ModRMByte(SS, Index, Base), CurByte, OS); + } + + + void EmitMemModRMByte(const MCInst &MI, unsigned Op, + unsigned RegOpcodeField, + unsigned TSFlags, unsigned &CurByte, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const; + + void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const; + +}; + +} // end anonymous namespace + + +MCCodeEmitter *llvm::createX86_32MCCodeEmitter(const Target &, + TargetMachine &TM, + MCContext &Ctx) { + return new X86MCCodeEmitter(TM, Ctx, false); +} + +MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &, + TargetMachine &TM, + MCContext &Ctx) { + return new X86MCCodeEmitter(TM, Ctx, true); +} + + +/// isDisp8 - Return true if this signed displacement fits in a 8-bit +/// sign-extended field. +static bool isDisp8(int Value) { + return Value == (signed char)Value; +} + +/// getImmFixupKind - Return the appropriate fixup kind to use for an immediate +/// in an instruction with the specified TSFlags. +static MCFixupKind getImmFixupKind(unsigned TSFlags) { + unsigned Size = X86II::getSizeOfImm(TSFlags); + bool isPCRel = X86II::isImmPCRel(TSFlags); + + switch (Size) { + default: assert(0 && "Unknown immediate size"); + case 1: return isPCRel ? MCFixupKind(X86::reloc_pcrel_1byte) : FK_Data_1; + case 4: return isPCRel ? MCFixupKind(X86::reloc_pcrel_4byte) : FK_Data_4; + case 2: assert(!isPCRel); return FK_Data_2; + case 8: assert(!isPCRel); return FK_Data_8; + } +} + + +void X86MCCodeEmitter:: +EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, + unsigned &CurByte, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const { + // If this is a simple integer displacement that doesn't require a relocation, + // emit it now. + if (DispOp.isImm()) { + // FIXME: is this right for pc-rel encoding?? Probably need to emit this as + // a fixup if so. + EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS); + return; + } + + // If we have an immoffset, add it to the expression. + const MCExpr *Expr = DispOp.getExpr(); + + // If the fixup is pc-relative, we need to bias the value to be relative to + // the start of the field, not the end of the field. + if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) || + FixupKind == MCFixupKind(X86::reloc_riprel_4byte)) + ImmOffset -= 4; + if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte)) + ImmOffset -= 1; + + if (ImmOffset) + Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx), + Ctx); + + // Emit a symbolic constant as a fixup and 4 zeros. + Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind)); + EmitConstant(0, Size, CurByte, OS); +} + + +void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, + unsigned RegOpcodeField, + unsigned TSFlags, unsigned &CurByte, + raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const{ + const MCOperand &Disp = MI.getOperand(Op+3); + const MCOperand &Base = MI.getOperand(Op); + const MCOperand &Scale = MI.getOperand(Op+1); + const MCOperand &IndexReg = MI.getOperand(Op+2); + unsigned BaseReg = Base.getReg(); + + // Handle %rip relative addressing. + if (BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode + assert(IndexReg.getReg() == 0 && Is64BitMode && + "Invalid rip-relative address"); + EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); + + // rip-relative addressing is actually relative to the *next* instruction. + // Since an immediate can follow the mod/rm byte for an instruction, this + // means that we need to bias the immediate field of the instruction with + // the size of the immediate field. If we have this case, add it into the + // expression to emit. + int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0; + + EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_riprel_4byte), + CurByte, OS, Fixups, -ImmSize); + return; + } + + unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U; + + // Determine whether a SIB byte is needed. + // If no BaseReg, issue a RIP relative instruction only if the MCE can + // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table + // 2-7) and absolute references. + + if (// The SIB byte must be used if there is an index register. + IndexReg.getReg() == 0 && + // The SIB byte must be used if the base is ESP/RSP/R12, all of which + // encode to an R/M value of 4, which indicates that a SIB byte is + // present. + BaseRegNo != N86::ESP && + // If there is no base register and we're in 64-bit mode, we need a SIB + // byte to emit an addr that is just 'disp32' (the non-RIP relative form). + (!Is64BitMode || BaseReg != 0)) { + + if (BaseReg == 0) { // [disp32] in X86-32 mode + EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); + EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); + return; + } + + // If the base is not EBP/ESP and there is no displacement, use simple + // indirect register encoding, this handles addresses like [EAX]. The + // encoding for [EBP] with no displacement means [disp32] so we handle it + // by emitting a displacement of 0 below. + if (Disp.isImm() && Disp.getImm() == 0 && BaseRegNo != N86::EBP) { + EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS); + return; + } + + // Otherwise, if the displacement fits in a byte, encode as [REG+disp8]. + if (Disp.isImm() && isDisp8(Disp.getImm())) { + EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS); + EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups); + return; + } + + // Otherwise, emit the most general non-SIB encoding: [REG+disp32] + EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS); + EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); + return; + } + + // We need a SIB byte, so start by outputting the ModR/M byte first + assert(IndexReg.getReg() != X86::ESP && + IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); + + bool ForceDisp32 = false; + bool ForceDisp8 = false; + if (BaseReg == 0) { + // If there is no base register, we emit the special case SIB byte with + // MOD=0, BASE=5, to JUST get the index, scale, and displacement. + EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS); + ForceDisp32 = true; + } else if (!Disp.isImm()) { + // Emit the normal disp32 encoding. + EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS); + ForceDisp32 = true; + } else if (Disp.getImm() == 0 && BaseReg != X86::EBP) { + // Emit no displacement ModR/M byte + EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS); + } else if (isDisp8(Disp.getImm())) { + // Emit the disp8 encoding. + EmitByte(ModRMByte(1, RegOpcodeField, 4), CurByte, OS); + ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP + } else { + // Emit the normal disp32 encoding. + EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS); + } + + // Calculate what the SS field value should be... + static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 }; + unsigned SS = SSTable[Scale.getImm()]; + + if (BaseReg == 0) { + // Handle the SIB byte for the case where there is no base, see Intel + // Manual 2A, table 2-7. The displacement has already been output. + unsigned IndexRegNo; + if (IndexReg.getReg()) + IndexRegNo = GetX86RegNum(IndexReg); + else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5) + IndexRegNo = 4; + EmitSIBByte(SS, IndexRegNo, 5, CurByte, OS); + } else { + unsigned IndexRegNo; + if (IndexReg.getReg()) + IndexRegNo = GetX86RegNum(IndexReg); + else + IndexRegNo = 4; // For example [ESP+1*<noreg>+4] + EmitSIBByte(SS, IndexRegNo, GetX86RegNum(Base), CurByte, OS); + } + + // Do we need to output a displacement? + if (ForceDisp8) + EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups); + else if (ForceDisp32 || Disp.getImm() != 0) + EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); +} + +/// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64 +/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand +/// size, and 3) use of X86-64 extended registers. +static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags, + const TargetInstrDesc &Desc) { + // Pseudo instructions never have a rex byte. + if ((TSFlags & X86II::FormMask) == X86II::Pseudo) + return 0; + + unsigned REX = 0; + if (TSFlags & X86II::REX_W) + REX |= 1 << 3; + + if (MI.getNumOperands() == 0) return REX; + + unsigned NumOps = MI.getNumOperands(); + // FIXME: MCInst should explicitize the two-addrness. + bool isTwoAddr = NumOps > 1 && + Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; + + // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. + unsigned i = isTwoAddr ? 1 : 0; + for (; i != NumOps; ++i) { + const MCOperand &MO = MI.getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!X86InstrInfo::isX86_64NonExtLowByteReg(Reg)) continue; + // FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything + // that returns non-zero. + REX |= 0x40; + break; + } + + switch (TSFlags & X86II::FormMask) { + case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!"); + case X86II::MRMSrcReg: + if (MI.getOperand(0).isReg() && + X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + REX |= 1 << 2; + i = isTwoAddr ? 2 : 1; + for (; i != NumOps; ++i) { + const MCOperand &MO = MI.getOperand(i); + if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + REX |= 1 << 0; + } + break; + case X86II::MRMSrcMem: { + if (MI.getOperand(0).isReg() && + X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + REX |= 1 << 2; + unsigned Bit = 0; + i = isTwoAddr ? 2 : 1; + for (; i != NumOps; ++i) { + const MCOperand &MO = MI.getOperand(i); + if (MO.isReg()) { + if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + REX |= 1 << Bit; + Bit++; + } + } + break; + } + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: + case X86II::MRMDestMem: { + unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands); + i = isTwoAddr ? 1 : 0; + if (NumOps > e && MI.getOperand(e).isReg() && + X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e).getReg())) + REX |= 1 << 2; + unsigned Bit = 0; + for (; i != e; ++i) { + const MCOperand &MO = MI.getOperand(i); + if (MO.isReg()) { + if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + REX |= 1 << Bit; + Bit++; + } + } + break; + } + default: + if (MI.getOperand(0).isReg() && + X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + REX |= 1 << 0; + i = isTwoAddr ? 2 : 1; + for (unsigned e = NumOps; i != e; ++i) { + const MCOperand &MO = MI.getOperand(i); + if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + REX |= 1 << 2; + } + break; + } + return REX; +} + +void X86MCCodeEmitter:: +EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const { + unsigned Opcode = MI.getOpcode(); + const TargetInstrDesc &Desc = TII.get(Opcode); + unsigned TSFlags = Desc.TSFlags; + + // Keep track of the current byte being emitted. + unsigned CurByte = 0; + + // FIXME: We should emit the prefixes in exactly the same order as GAS does, + // in order to provide diffability. + + // Emit the lock opcode prefix as needed. + if (TSFlags & X86II::LOCK) + EmitByte(0xF0, CurByte, OS); + + // Emit segment override opcode prefix as needed. + switch (TSFlags & X86II::SegOvrMask) { + default: assert(0 && "Invalid segment!"); + case 0: break; // No segment override! + case X86II::FS: + EmitByte(0x64, CurByte, OS); + break; + case X86II::GS: + EmitByte(0x65, CurByte, OS); + break; + } + + // Emit the repeat opcode prefix as needed. + if ((TSFlags & X86II::Op0Mask) == X86II::REP) + EmitByte(0xF3, CurByte, OS); + + // Emit the operand size opcode prefix as needed. + if (TSFlags & X86II::OpSize) + EmitByte(0x66, CurByte, OS); + + // Emit the address size opcode prefix as needed. + if (TSFlags & X86II::AdSize) + EmitByte(0x67, CurByte, OS); + + bool Need0FPrefix = false; + switch (TSFlags & X86II::Op0Mask) { + default: assert(0 && "Invalid prefix!"); + case 0: break; // No prefix! + case X86II::REP: break; // already handled. + case X86II::TB: // Two-byte opcode prefix + case X86II::T8: // 0F 38 + case X86II::TA: // 0F 3A + Need0FPrefix = true; + break; + case X86II::TF: // F2 0F 38 + EmitByte(0xF2, CurByte, OS); + Need0FPrefix = true; + break; + case X86II::XS: // F3 0F + EmitByte(0xF3, CurByte, OS); + Need0FPrefix = true; + break; + case X86II::XD: // F2 0F + EmitByte(0xF2, CurByte, OS); + Need0FPrefix = true; + break; + case X86II::D8: EmitByte(0xD8, CurByte, OS); break; + case X86II::D9: EmitByte(0xD9, CurByte, OS); break; + case X86II::DA: EmitByte(0xDA, CurByte, OS); break; + case X86II::DB: EmitByte(0xDB, CurByte, OS); break; + case X86II::DC: EmitByte(0xDC, CurByte, OS); break; + case X86II::DD: EmitByte(0xDD, CurByte, OS); break; + case X86II::DE: EmitByte(0xDE, CurByte, OS); break; + case X86II::DF: EmitByte(0xDF, CurByte, OS); break; + } + + // Handle REX prefix. + // FIXME: Can this come before F2 etc to simplify emission? + if (Is64BitMode) { + if (unsigned REX = DetermineREXPrefix(MI, TSFlags, Desc)) + EmitByte(0x40 | REX, CurByte, OS); + } + + // 0x0F escape code must be emitted just before the opcode. + if (Need0FPrefix) + EmitByte(0x0F, CurByte, OS); + + // FIXME: Pull this up into previous switch if REX can be moved earlier. + switch (TSFlags & X86II::Op0Mask) { + case X86II::TF: // F2 0F 38 + case X86II::T8: // 0F 38 + EmitByte(0x38, CurByte, OS); + break; + case X86II::TA: // 0F 3A + EmitByte(0x3A, CurByte, OS); + break; + } + + // If this is a two-address instruction, skip one of the register operands. + unsigned NumOps = Desc.getNumOperands(); + unsigned CurOp = 0; + if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1) + ++CurOp; + else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) + // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 + --NumOps; + + unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags); + switch (TSFlags & X86II::FormMask) { + case X86II::MRMInitReg: + assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!"); + default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n"; + assert(0 && "Unknown FormMask value in X86MCCodeEmitter!"); + case X86II::Pseudo: return; // Pseudo instructions encode to nothing. + case X86II::RawFrm: + EmitByte(BaseOpcode, CurByte, OS); + break; + + case X86II::AddRegFrm: + EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS); + break; + + case X86II::MRMDestReg: + EmitByte(BaseOpcode, CurByte, OS); + EmitRegModRMByte(MI.getOperand(CurOp), + GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS); + CurOp += 2; + break; + + case X86II::MRMDestMem: + EmitByte(BaseOpcode, CurByte, OS); + EmitMemModRMByte(MI, CurOp, + GetX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands)), + TSFlags, CurByte, OS, Fixups); + CurOp += X86AddrNumOperands + 1; + break; + + case X86II::MRMSrcReg: + EmitByte(BaseOpcode, CurByte, OS); + EmitRegModRMByte(MI.getOperand(CurOp+1), GetX86RegNum(MI.getOperand(CurOp)), + CurByte, OS); + CurOp += 2; + break; + + case X86II::MRMSrcMem: { + EmitByte(BaseOpcode, CurByte, OS); + + // FIXME: Maybe lea should have its own form? This is a horrible hack. + int AddrOperands; + if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || + Opcode == X86::LEA16r || Opcode == X86::LEA32r) + AddrOperands = X86AddrNumOperands - 1; // No segment register + else + AddrOperands = X86AddrNumOperands; + + EmitMemModRMByte(MI, CurOp+1, GetX86RegNum(MI.getOperand(CurOp)), + TSFlags, CurByte, OS, Fixups); + CurOp += AddrOperands + 1; + break; + } + + case X86II::MRM0r: case X86II::MRM1r: + case X86II::MRM2r: case X86II::MRM3r: + case X86II::MRM4r: case X86II::MRM5r: + case X86II::MRM6r: case X86II::MRM7r: + EmitByte(BaseOpcode, CurByte, OS); + EmitRegModRMByte(MI.getOperand(CurOp++), + (TSFlags & X86II::FormMask)-X86II::MRM0r, + CurByte, OS); + break; + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: + EmitByte(BaseOpcode, CurByte, OS); + EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m, + TSFlags, CurByte, OS, Fixups); + CurOp += X86AddrNumOperands; + break; + case X86II::MRM_C1: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC1, CurByte, OS); + break; + case X86II::MRM_C2: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC2, CurByte, OS); + break; + case X86II::MRM_C3: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC3, CurByte, OS); + break; + case X86II::MRM_C4: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC4, CurByte, OS); + break; + case X86II::MRM_C8: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC8, CurByte, OS); + break; + case X86II::MRM_C9: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xC9, CurByte, OS); + break; + case X86II::MRM_E8: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xE8, CurByte, OS); + break; + case X86II::MRM_F0: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xF0, CurByte, OS); + break; + case X86II::MRM_F8: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xF8, CurByte, OS); + break; + case X86II::MRM_F9: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xF9, CurByte, OS); + break; + } + + // If there is a remaining operand, it must be a trailing immediate. Emit it + // according to the right size for the instruction. + if (CurOp != NumOps) + EmitImmediate(MI.getOperand(CurOp++), + X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), + CurByte, OS, Fixups); + +#ifndef NDEBUG + // FIXME: Verify. + if (/*!Desc.isVariadic() &&*/ CurOp != NumOps) { + errs() << "Cannot encode all operands of: "; + MI.dump(); + errs() << '\n'; + abort(); + } +#endif +} diff --git a/lib/Target/X86/X86MCTargetExpr.cpp b/lib/Target/X86/X86MCTargetExpr.cpp new file mode 100644 index 0000000..17b4fe8 --- /dev/null +++ b/lib/Target/X86/X86MCTargetExpr.cpp @@ -0,0 +1,48 @@ +//===- X86MCTargetExpr.cpp - X86 Target Specific MCExpr Implementation ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "X86MCTargetExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +X86MCTargetExpr *X86MCTargetExpr::Create(const MCSymbol *Sym, VariantKind K, + MCContext &Ctx) { + return new (Ctx) X86MCTargetExpr(Sym, K); +} + +void X86MCTargetExpr::PrintImpl(raw_ostream &OS) const { + OS << *Sym; + + switch (Kind) { + case Invalid: OS << "@<invalid>"; break; + case GOT: OS << "@GOT"; break; + case GOTOFF: OS << "@GOTOFF"; break; + case GOTPCREL: OS << "@GOTPCREL"; break; + case GOTTPOFF: OS << "@GOTTPOFF"; break; + case INDNTPOFF: OS << "@INDNTPOFF"; break; + case NTPOFF: OS << "@NTPOFF"; break; + case PLT: OS << "@PLT"; break; + case TLSGD: OS << "@TLSGD"; break; + case TPOFF: OS << "@TPOFF"; break; + } +} + +bool X86MCTargetExpr::EvaluateAsRelocatableImpl(MCValue &Res) const { + // FIXME: I don't know if this is right, it followed MCSymbolRefExpr. + + // Evaluate recursively if this is a variable. + if (Sym->isVariable()) + return Sym->getValue()->EvaluateAsRelocatable(Res); + + Res = MCValue::get(Sym, 0, 0); + return true; +} diff --git a/lib/Target/X86/X86MCTargetExpr.h b/lib/Target/X86/X86MCTargetExpr.h new file mode 100644 index 0000000..7de8a5c --- /dev/null +++ b/lib/Target/X86/X86MCTargetExpr.h @@ -0,0 +1,49 @@ +//===- X86MCTargetExpr.h - X86 Target Specific MCExpr -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_MCTARGETEXPR_H +#define X86_MCTARGETEXPR_H + +#include "llvm/MC/MCExpr.h" + +namespace llvm { + +/// X86MCTargetExpr - This class represents symbol variants, like foo@GOT. +class X86MCTargetExpr : public MCTargetExpr { +public: + enum VariantKind { + Invalid, + GOT, + GOTOFF, + GOTPCREL, + GOTTPOFF, + INDNTPOFF, + NTPOFF, + PLT, + TLSGD, + TPOFF + }; +private: + /// Sym - The symbol being referenced. + const MCSymbol * const Sym; + /// Kind - The modifier. + const VariantKind Kind; + + X86MCTargetExpr(const MCSymbol *S, VariantKind K) : Sym(S), Kind(K) {} +public: + static X86MCTargetExpr *Create(const MCSymbol *Sym, VariantKind K, + MCContext &Ctx); + + void PrintImpl(raw_ostream &OS) const; + bool EvaluateAsRelocatableImpl(MCValue &Res) const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index fafcf7e..4b2529b 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -18,12 +18,6 @@ namespace llvm { -enum NameDecorationStyle { - None, - StdCall, - FastCall -}; - /// X86MachineFunctionInfo - This class is derived from MachineFunction and /// contains private X86 target-specific information for each MachineFunction. class X86MachineFunctionInfo : public MachineFunctionInfo { @@ -41,16 +35,11 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// Used on windows platform for stdcall & fastcall name decoration unsigned BytesToPopOnReturn; - /// DecorationStyle - If the function requires additional name decoration, - /// DecorationStyle holds the right way to do so. - NameDecorationStyle DecorationStyle; - /// ReturnAddrIndex - FrameIndex for return slot. int ReturnAddrIndex; - /// TailCallReturnAddrDelta - Delta the ReturnAddr stack slot is moved - /// Used for creating an area before the register spill area on the stack - /// the returnaddr can be savely move to this area + /// TailCallReturnAddrDelta - The number of bytes by which return address + /// stack slot is moved as the result of tail call optimization. int TailCallReturnAddrDelta; /// SRetReturnReg - Some subtargets require that sret lowering includes @@ -67,7 +56,6 @@ public: X86MachineFunctionInfo() : ForceFramePointer(false), CalleeSavedFrameSize(0), BytesToPopOnReturn(0), - DecorationStyle(None), ReturnAddrIndex(0), TailCallReturnAddrDelta(0), SRetReturnReg(0), @@ -77,7 +65,6 @@ public: : ForceFramePointer(false), CalleeSavedFrameSize(0), BytesToPopOnReturn(0), - DecorationStyle(None), ReturnAddrIndex(0), TailCallReturnAddrDelta(0), SRetReturnReg(0), @@ -92,9 +79,6 @@ public: unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; } void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;} - NameDecorationStyle getDecorationStyle() const { return DecorationStyle; } - void setDecorationStyle(NameDecorationStyle style) { DecorationStyle = style;} - int getRAIndex() const { return ReturnAddrIndex; } void setRAIndex(int Index) { ReturnAddrIndex = Index; } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index f959a2d..8524236 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -473,9 +473,9 @@ bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction &MF, unsigned Reg, } int -X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const { +X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const { const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); - MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineFrameInfo *MFI = MF.getFrameInfo(); int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea(); uint64_t StackSize = MFI->getStackSize(); @@ -485,7 +485,7 @@ X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const { Offset += SlotSize; } else { unsigned Align = MFI->getObjectAlignment(FI); - assert( (-(Offset + StackSize)) % Align == 0); + assert((-(Offset + StackSize)) % Align == 0); Align = 0; return Offset + StackSize; } @@ -498,7 +498,7 @@ X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const { Offset += SlotSize; // Skip the RETADDR move area - X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); if (TailCallReturnAddrDelta < 0) Offset -= TailCallReturnAddrDelta; @@ -627,10 +627,6 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { MachineFrameInfo *MFI = MF.getFrameInfo(); - // Calculate and set max stack object alignment early, so we can decide - // whether we will need stack realignment (and thus FP). - MFI->calculateMaxStackAlignment(); - X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); @@ -1242,13 +1238,19 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, } // Jump to label or value in register. - if (RetOpcode == X86::TCRETURNdi|| RetOpcode == X86::TCRETURNdi64) + if (RetOpcode == X86::TCRETURNdi|| RetOpcode == X86::TCRETURNdi64) { BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPd)). - addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); - else if (RetOpcode== X86::TCRETURNri64) + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), + JumpTarget.getTargetFlags()); + } else if (RetOpcode == X86::TCRETURNri64) { BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg()); - else + } else { BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg()); + } + + MachineInstr *NewMI = prior(MBBI); + for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) + NewMI->addOperand(MBBI->getOperand(i)); // Delete the pseudo instruction TCRETURN. MBB.erase(MBBI); diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index dec3fba..8fb5e92 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -156,7 +156,7 @@ public: // Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; - int getFrameIndexOffset(MachineFunction &MF, int FI) const; + int getFrameIndexOffset(const MachineFunction &MF, int FI) const; void getInitialFrameState(std::vector<MachineMove> &Moves) const; // Exception handling queries. diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 6db0cc3..1559bf7 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -512,20 +512,17 @@ def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> { let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD]; } -// GR8_NOREX, GR16_NOREX, GR32_NOREX, GR64_NOREX - Subclasses of -// GR8, GR16, GR32, and GR64 which contain only the first 8 GPRs. -// On x86-64, GR64_NOREX, GR32_NOREX and GR16_NOREX are the classes -// of registers which do not by themselves require a REX prefix. +// GR8_NOREX - GR8 registers which do not require a REX prefix. def GR8_NOREX : RegisterClass<"X86", [i8], 8, - [AL, CL, DL, AH, CH, DH, BL, BH, - SIL, DIL, BPL, SPL]> { + [AL, CL, DL, AH, CH, DH, BL, BH]> { let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ + // In 64-bit mode, it's not safe to blindly allocate H registers. static const unsigned X86_GR8_NOREX_AO_64[] = { - X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, X86::BL, X86::BPL + X86::AL, X86::CL, X86::DL, X86::BL }; GR8_NOREXClass::iterator @@ -541,21 +538,15 @@ def GR8_NOREX : RegisterClass<"X86", [i8], 8, GR8_NOREXClass::iterator GR8_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); - // Does the function dedicate RBP / EBP to being a frame ptr? - if (!Subtarget.is64Bit()) - // In 32-mode, none of the 8-bit registers aliases EBP or ESP. - return begin() + 8; - else if (RI->hasFP(MF)) - // If so, don't allocate SPL or BPL. - return array_endof(X86_GR8_NOREX_AO_64) - 1; - else - // If not, just don't allocate SPL. + if (Subtarget.is64Bit()) return array_endof(X86_GR8_NOREX_AO_64); + else + return end(); } }]; } +// GR16_NOREX - GR16 registers which do not require a REX prefix. def GR16_NOREX : RegisterClass<"X86", [i16], 16, [AX, CX, DX, SI, DI, BX, BP, SP]> { let SubRegClassList = [GR8_NOREX, GR8_NOREX]; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 2039be7..adef5bc 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -53,9 +53,9 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { if (GV->hasDLLImportLinkage()) return X86II::MO_DLLIMPORT; - // GV with ghost linkage (in JIT lazy compilation mode) do not require an + // Materializable GVs (in JIT lazy compilation mode) do not require an // extra load from stub. - bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode(); + bool isDecl = GV->isDeclaration() && !GV->isMaterializable(); // X86-64 in PIC mode. if (isPICStyleRIPRel()) { diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 618dd10..5e05c2f 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -175,7 +175,7 @@ public: else if (isTargetDarwin()) p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32"; else if (isTargetMingw() || isTargetWindows()) - p = "e-p:32:32-f64:64:64-i64:64:64-f80:128:128-n8:16:32"; + p = "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32"; else p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"; diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 731c3ab..7802f98 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -30,9 +30,8 @@ static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { case Triple::MinGW32: case Triple::MinGW64: case Triple::Cygwin: - return new X86MCAsmInfoCOFF(TheTriple); case Triple::Win32: - return new X86WinMCAsmInfo(TheTriple); + return new X86MCAsmInfoCOFF(TheTriple); default: return new X86ELFMCAsmInfo(TheTriple); } @@ -48,8 +47,10 @@ extern "C" void LLVMInitializeX86Target() { RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo); // Register the code emitter. - TargetRegistry::RegisterCodeEmitter(TheX86_32Target, createX86MCCodeEmitter); - TargetRegistry::RegisterCodeEmitter(TheX86_64Target, createX86MCCodeEmitter); + TargetRegistry::RegisterCodeEmitter(TheX86_32Target, + createX86_32MCCodeEmitter); + TargetRegistry::RegisterCodeEmitter(TheX86_64Target, + createX86_64MCCodeEmitter); } @@ -145,10 +146,6 @@ bool X86TargetMachine::addInstSelector(PassManagerBase &PM, // Install an instruction selector. PM.add(createX86ISelDag(*this, OptLevel)); - // If we're using Fast-ISel, clean up the mess. - if (EnableFastISel) - PM.add(createDeadMachineInstructionElimPass()); - // Install a pass to insert x87 FP_REG_KILL instructions, as needed. PM.add(createX87FPRegKillInserterPass()); @@ -168,22 +165,6 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM, bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - MachineCodeEmitter &MCE) { - // FIXME: Move this to TargetJITInfo! - // On Darwin, do not override 64-bit setting made in X86TargetMachine(). - if (DefRelocModel == Reloc::Default && - (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) { - setRelocationModel(Reloc::Static); - Subtarget.setPICStyle(PICStyles::None); - } - - PM.add(createX86CodeEmitterPass(*this, MCE)); - - return false; -} - -bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { // FIXME: Move this to TargetJITInfo! // On Darwin, do not override 64-bit setting made in X86TargetMachine(). @@ -199,34 +180,6 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, return false; } -bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - ObjectCodeEmitter &OCE) { - PM.add(createX86ObjectCodeEmitterPass(*this, OCE)); - return false; -} - -bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - MachineCodeEmitter &MCE) { - PM.add(createX86CodeEmitterPass(*this, MCE)); - return false; -} - -bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - JITCodeEmitter &JCE) { - PM.add(createX86JITCodeEmitterPass(*this, JCE)); - return false; -} - -bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - ObjectCodeEmitter &OCE) { - PM.add(createX86ObjectCodeEmitterPass(*this, OCE)); - return false; -} - void X86TargetMachine::setCodeModelForStatic() { if (getCodeModel() != CodeModel::Default) return; @@ -246,32 +199,3 @@ void X86TargetMachine::setCodeModelForJIT() { else setCodeModel(CodeModel::Small); } - -/// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are 4-byte, -/// 8-byte, and target default. The CIE is hard-coded to indicate that the LSDA -/// pointer in the FDE section is an "sdata4", and should be encoded as a 4-byte -/// pointer by default. However, some systems may require a different size due -/// to bugs or other conditions. We will default to a 4-byte encoding unless the -/// system tells us otherwise. -/// -/// The issue is when the CIE says their is an LSDA. That mandates that every -/// FDE have an LSDA slot. But if the function does not need an LSDA. There -/// needs to be some way to signify there is none. The LSDA is encoded as -/// pc-rel. But you don't look for some magic value after adding the pc. You -/// have to look for a zero before adding the pc. The problem is that the size -/// of the zero to look for depends on the encoding. The unwinder bug in SL is -/// that it always checks for a pointer-size zero. So on x86_64 it looks for 8 -/// bytes of zero. If you have an LSDA, it works fine since the 8-bytes are -/// non-zero so it goes ahead and then reads the value based on the encoding. -/// But if you use sdata4 and there is no LSDA, then the test for zero gives a -/// false negative and the unwinder thinks there is an LSDA. -/// -/// FIXME: This call-back isn't good! We should be using the correct encoding -/// regardless of the system. However, there are some systems which have bugs -/// that prevent this from occuring. -DwarfLSDAEncoding::Encoding X86TargetMachine::getLSDAEncoding() const { - if (Subtarget.isTargetDarwin() && Subtarget.getDarwinVers() != 10) - return DwarfLSDAEncoding::Default; - - return DwarfLSDAEncoding::EightByte; -} diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index d05bebd..2bb5454 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -62,37 +62,12 @@ public: return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; } - /// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are - /// 4-byte, 8-byte, and target default. The CIE is hard-coded to indicate that - /// the LSDA pointer in the FDE section is an "sdata4", and should be encoded - /// as a 4-byte pointer by default. However, some systems may require a - /// different size due to bugs or other conditions. We will default to a - /// 4-byte encoding unless the system tells us otherwise. - /// - /// FIXME: This call-back isn't good! We should be using the correct encoding - /// regardless of the system. However, there are some systems which have bugs - /// that prevent this from occuring. - virtual DwarfLSDAEncoding::Encoding getLSDAEncoding() const; - // Set up the pass pipeline. virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - MachineCodeEmitter &MCE); - virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE); - virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - ObjectCodeEmitter &OCE); - virtual bool addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - MachineCodeEmitter &MCE); - virtual bool addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - JITCodeEmitter &JCE); - virtual bool addSimpleCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - ObjectCodeEmitter &OCE); }; /// X86_32TargetMachine - X86 32-bit target machine. diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index 41ad153..d1ee3fc 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -7,60 +7,176 @@ // //===----------------------------------------------------------------------===// +#include "X86MCTargetExpr.h" #include "X86TargetObjectFile.h" +#include "X86TargetMachine.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" #include "llvm/Target/Mangler.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/Dwarf.h" using namespace llvm; +using namespace dwarf; const MCExpr *X8632_MachoTargetObjectFile:: getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const { + MachineModuleInfo *MMI, unsigned Encoding) const { // The mach-o version of this method defaults to returning a stub reference. - IsIndirect = true; - IsPCRel = false; - - - MachineModuleInfoMachO &MachOMMI = - MMI->getObjFileInfo<MachineModuleInfoMachO>(); - - // FIXME: Use GetSymbolWithGlobalValueBase. - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, true); - Name += "$non_lazy_ptr"; - - // Add information about the stub reference to MachOMMI so that the stub gets - // emitted by the asmprinter. - MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str()); - const MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym); - if (StubSym == 0) { - Name.clear(); - Mang->getNameWithPrefix(Name, GV, false); - StubSym = getContext().GetOrCreateSymbol(Name.str()); + + if (Encoding & DW_EH_PE_indirect) { + MachineModuleInfoMachO &MachOMMI = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, true); + Name += "$non_lazy_ptr"; + + // Add information about the stub reference to MachOMMI so that the stub + // gets emitted by the asmprinter. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str()); + MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym); + if (StubSym == 0) { + Name.clear(); + Mang->getNameWithPrefix(Name, GV, false); + StubSym = getContext().GetOrCreateSymbol(Name.str()); + } + + return TargetLoweringObjectFile:: + getSymbolForDwarfReference(Sym, MMI, + Encoding & ~dwarf::DW_EH_PE_indirect); } - - return MCSymbolRefExpr::Create(Sym, getContext()); + + return TargetLoweringObjectFileMachO:: + getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding); } const MCExpr *X8664_MachoTargetObjectFile:: getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const { - + MachineModuleInfo *MMI, unsigned Encoding) const { + // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which // is an indirect pc-relative reference. - IsIndirect = true; - IsPCRel = true; - - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, false); - Name += "@GOTPCREL"; - const MCExpr *Res = - MCSymbolRefExpr::Create(Name.str(), getContext()); - const MCExpr *Four = MCConstantExpr::Create(4, getContext()); - return MCBinaryExpr::CreateAdd(Res, Four, getContext()); + if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) { + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, false); + const MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + const MCExpr *Res = + X86MCTargetExpr::Create(Sym, X86MCTargetExpr::GOTPCREL, getContext()); + const MCExpr *Four = MCConstantExpr::Create(4, getContext()); + return MCBinaryExpr::CreateAdd(Res, Four, getContext()); + } + + return TargetLoweringObjectFileMachO:: + getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding); } +unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const { + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; + else + return DW_EH_PE_absptr; +} + +unsigned X8632_ELFTargetObjectFile::getLSDAEncoding() const { + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; + else + return DW_EH_PE_absptr; +} + +unsigned X8632_ELFTargetObjectFile::getFDEEncoding() const { + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; + else + return DW_EH_PE_absptr; +} + +unsigned X8632_ELFTargetObjectFile::getTTypeEncoding() const { + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; + else + return DW_EH_PE_absptr; +} + +unsigned X8664_ELFTargetObjectFile::getPersonalityEncoding() const { + CodeModel::Model Model = TM.getCodeModel(); + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small || + Model == CodeModel::Medium ? + DW_EH_PE_sdata4 : DW_EH_PE_sdata8); + + if (Model == CodeModel::Small || Model == CodeModel::Medium) + return DW_EH_PE_udata4; + + return DW_EH_PE_absptr; +} + +unsigned X8664_ELFTargetObjectFile::getLSDAEncoding() const { + CodeModel::Model Model = TM.getCodeModel(); + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_pcrel | (Model == CodeModel::Small ? + DW_EH_PE_sdata4 : DW_EH_PE_sdata8); + + if (Model == CodeModel::Small) + return DW_EH_PE_udata4; + + return DW_EH_PE_absptr; +} + +unsigned X8664_ELFTargetObjectFile::getFDEEncoding() const { + CodeModel::Model Model = TM.getCodeModel(); + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_pcrel | (Model == CodeModel::Small || + Model == CodeModel::Medium ? + DW_EH_PE_sdata4 : DW_EH_PE_sdata8); + + if (Model == CodeModel::Small || Model == CodeModel::Medium) + return DW_EH_PE_udata4; + + return DW_EH_PE_absptr; +} + +unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const { + CodeModel::Model Model = TM.getCodeModel(); + if (TM.getRelocationModel() == Reloc::PIC_) + return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small || + Model == CodeModel::Medium ? + DW_EH_PE_sdata4 : DW_EH_PE_sdata8); + + if (Model == CodeModel::Small) + return DW_EH_PE_udata4; + + return DW_EH_PE_absptr; +} + +unsigned X8632_MachoTargetObjectFile::getPersonalityEncoding() const { + return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; +} + +unsigned X8632_MachoTargetObjectFile::getLSDAEncoding() const { + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; +} + +unsigned X8632_MachoTargetObjectFile::getFDEEncoding() const { + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; +} + +unsigned X8632_MachoTargetObjectFile::getTTypeEncoding() const { + return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; +} + +unsigned X8664_MachoTargetObjectFile::getPersonalityEncoding() const { + return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; +} + +unsigned X8664_MachoTargetObjectFile::getLSDAEncoding() const { + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; +} + +unsigned X8664_MachoTargetObjectFile::getFDEEncoding() const { + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; +} + +unsigned X8664_MachoTargetObjectFile::getTTypeEncoding() const { + return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; +} diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index 377a93b..0fff194 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -10,21 +10,27 @@ #ifndef LLVM_TARGET_X86_TARGETOBJECTFILE_H #define LLVM_TARGET_X86_TARGETOBJECTFILE_H +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetLoweringObjectFile.h" namespace llvm { - + class X86TargetMachine; + /// X8632_MachoTargetObjectFile - This TLOF implementation is used for /// Darwin/x86-32. class X8632_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { public: - + virtual const MCExpr * getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const; + MachineModuleInfo *MMI, unsigned Encoding) const; + virtual unsigned getPersonalityEncoding() const; + virtual unsigned getLSDAEncoding() const; + virtual unsigned getFDEEncoding() const; + virtual unsigned getTTypeEncoding() const; }; - + /// X8664_MachoTargetObjectFile - This TLOF implementation is used for /// Darwin/x86-64. class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { @@ -32,9 +38,35 @@ namespace llvm { virtual const MCExpr * getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - bool &IsIndirect, bool &IsPCRel) const; + MachineModuleInfo *MMI, unsigned Encoding) const; + virtual unsigned getPersonalityEncoding() const; + virtual unsigned getLSDAEncoding() const; + virtual unsigned getFDEEncoding() const; + virtual unsigned getTTypeEncoding() const; + }; + + class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF { + const X86TargetMachine &TM; + public: + X8632_ELFTargetObjectFile(const X86TargetMachine &tm) + :TM(tm) { }; + virtual unsigned getPersonalityEncoding() const; + virtual unsigned getLSDAEncoding() const; + virtual unsigned getFDEEncoding() const; + virtual unsigned getTTypeEncoding() const; }; + + class X8664_ELFTargetObjectFile : public TargetLoweringObjectFileELF { + const X86TargetMachine &TM; + public: + X8664_ELFTargetObjectFile(const X86TargetMachine &tm) + :TM(tm) { }; + virtual unsigned getPersonalityEncoding() const; + virtual unsigned getLSDAEncoding() const; + virtual unsigned getFDEEncoding() const; + virtual unsigned getTTypeEncoding() const; + }; + } // end namespace llvm #endif |