diff options
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/AsmPrinter/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp | 491 | ||||
-rw-r--r-- | lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h | 23 | ||||
-rw-r--r-- | lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp | 6 | ||||
-rw-r--r-- | lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp | 7 | ||||
-rw-r--r-- | lib/Target/X86/X86.td | 10 | ||||
-rw-r--r-- | lib/Target/X86/X86CodeEmitter.cpp | 8 | ||||
-rw-r--r-- | lib/Target/X86/X86FastISel.cpp | 37 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelDAGToDAG.cpp | 188 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 232 | ||||
-rw-r--r-- | lib/Target/X86/X86Instr64bit.td | 23 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrBuilder.h | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 46 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.h | 81 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 10 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrMMX.td | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86RegisterInfo.td | 6 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.cpp | 9 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.h | 18 | ||||
-rw-r--r-- | lib/Target/X86/X86TargetMachine.cpp | 17 | ||||
-rw-r--r-- | lib/Target/X86/X86TargetMachine.h | 6 |
21 files changed, 700 insertions, 527 deletions
diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt index 2079a9f..a28c826 100644 --- a/lib/Target/X86/AsmPrinter/CMakeLists.txt +++ b/lib/Target/X86/AsmPrinter/CMakeLists.txt @@ -6,3 +6,4 @@ add_llvm_library(LLVMX86AsmPrinter X86AsmPrinter.cpp X86IntelAsmPrinter.cpp ) +add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen)
\ No newline at end of file diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp index 60ed4f0..e75cfc5 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp @@ -23,10 +23,13 @@ #include "llvm/CallingConv.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" +#include "llvm/MDNode.h" #include "llvm/Type.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/CodeGen/DwarfWriter.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/Support/CommandLine.h" @@ -41,18 +44,26 @@ STATISTIC(EmittedInsts, "Number of machine instrs printed"); static cl::opt<bool> NewAsmPrinter("experimental-asm-printer", cl::Hidden); -static std::string getPICLabelString(unsigned FnNum, - const TargetAsmInfo *TAI, - const X86Subtarget* Subtarget) { - std::string label; +//===----------------------------------------------------------------------===// +// Primitive Helper Functions. +//===----------------------------------------------------------------------===// + +void X86ATTAsmPrinter::PrintPICBaseSymbol() const { if (Subtarget->isTargetDarwin()) - label = "\"L" + utostr_32(FnNum) + "$pb\""; + O << "\"L" << getFunctionNumber() << "$pb\""; else if (Subtarget->isTargetELF()) - label = ".Lllvm$" + utostr_32(FnNum) + "." "$piclabel"; + O << ".Lllvm$" << getFunctionNumber() << "." "$piclabel"; else assert(0 && "Don't know how to print PIC label!\n"); +} - return label; +/// PrintUnmangledNameSafely - Print out the printable characters in the name. +/// Don't print things like \\n or \\0. +static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) { + for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen(); + Name != E; ++Name) + if (isprint(*Name)) + OS << *Name; } static X86MachineFunctionInfo calculateFunctionInfo(const Function *F, @@ -89,15 +100,6 @@ static X86MachineFunctionInfo calculateFunctionInfo(const Function *F, return Info; } -/// PrintUnmangledNameSafely - Print out the printable characters in the name. -/// Don't print things like \\n or \\0. -static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) { - for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen(); - Name != E; ++Name) - if (isprint(*Name)) - OS << *Name; -} - /// decorateName - Query FunctionInfoMap and use this information for various /// name decoration. void X86ATTAsmPrinter::decorateName(std::string &Name, @@ -152,6 +154,8 @@ void X86ATTAsmPrinter::decorateName(std::string &Name, } } + + void X86ATTAsmPrinter::emitFunctionHeader(const MachineFunction &MF) { const Function *F = MF.getFunction(); @@ -159,9 +163,12 @@ void X86ATTAsmPrinter::emitFunctionHeader(const MachineFunction &MF) { SwitchToSection(TAI->SectionForGlobal(F)); + // FIXME: A function's alignment should be part of MachineFunction. There + // shouldn't be a policy decision here. unsigned FnAlign = 4; if (F->hasFnAttr(Attribute::OptimizeForSize)) FnAlign = 1; + switch (F->getLinkage()) { default: assert(0 && "Unknown linkage type!"); case Function::InternalLinkage: // Symbols default to internal. @@ -283,13 +290,8 @@ bool X86ATTAsmPrinter::runOnMachineFunction(MachineFunction &MF) { return false; } -static inline bool shouldPrintGOT(TargetMachine &TM, const X86Subtarget* ST) { - return ST->isPICStyleGOT() && TM.getRelocationModel() == Reloc::PIC_; -} - static inline bool shouldPrintPLT(TargetMachine &TM, const X86Subtarget* ST) { - return ST->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_ && - (ST->isPICStyleRIPRel() || ST->isPICStyleGOT()); + return ST->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_; } static inline bool shouldPrintStub(TargetMachine &TM, const X86Subtarget* ST) { @@ -324,6 +326,8 @@ void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) { } if (shouldPrintStub(TM, Subtarget)) { + // DARWIN/X86-32 in != static mode. + // Link-once, declaration, or Weakly-linked global variables need // non-lazily-resolved stubs if (GV->isDeclaration() || GV->isWeakForLinker()) { @@ -354,9 +358,8 @@ void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) { O << Name; } } else { - if (GV->hasDLLImportLinkage()) { + if (GV->hasDLLImportLinkage()) O << "__imp_"; - } O << Name; if (shouldPrintPLT(TM, Subtarget)) { @@ -370,9 +373,6 @@ void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) { FnStubs.insert(Name); } - if (GV->hasExternalWeakLinkage()) - ExtWeakSymbols.insert(GV); - printOffset(MO.getOffset()); if (needCloseParen) @@ -386,7 +386,9 @@ void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) { Name += MO.getSymbolName(); // Print function stub suffix unless it's Mac OS X 10.5 and up. if (shouldPrintStub(TM, Subtarget) && + // DARWIN/X86-32 in != static mode. !(Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9)) { + FnStubs.insert(Name); printSuffixedName(Name, "$stub"); return; @@ -401,23 +403,15 @@ void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) { O << Name; - if (shouldPrintPLT(TM, Subtarget)) { - std::string GOTName(TAI->getGlobalPrefix()); - GOTName+="_GLOBAL_OFFSET_TABLE_"; - if (Name == GOTName) - // HACK! Emit extra offset to PC during printing GOT offset to - // compensate for the size of popl instruction. The resulting code - // should look like: - // call .piclabel - // piclabel: - // popl %some_register - // addl $_GLOBAL_ADDRESS_TABLE_ + [.-piclabel], %some_register - O << " + [.-" - << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << ']'; - - O << "@PLT"; + if (MO.getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS) { + O << " + [.-"; + PrintPICBaseSymbol(); + O << ']'; } + if (shouldPrintPLT(TM, Subtarget)) + O << "@PLT"; + if (needCloseParen) O << ')'; @@ -427,9 +421,10 @@ void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) { } void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, - const char *Modifier, bool NotRIPRel) { + const char *Modifier) { const MachineOperand &MO = MI->getOperand(OpNo); switch (MO.getType()) { + default: assert(0 && "unknown operand type!"); case MachineOperand::MO_Register: { assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && "Virtual registers should not make it this far!"); @@ -456,18 +451,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, if (!isMemOp) O << '$'; O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << MO.getIndex(); - - if (TM.getRelocationModel() == Reloc::PIC_) { - if (Subtarget->isPICStyleStub()) - O << "-\"" << TAI->getPrivateGlobalPrefix() << getFunctionNumber() - << "$pb\""; - else if (Subtarget->isPICStyleGOT()) - O << "@GOTOFF"; - } - - if (isMemOp && Subtarget->isPICStyleRIPRel() && !NotRIPRel) - O << "(%rip)"; - return; + break; } case MachineOperand::MO_ConstantPoolIndex: { bool isMemOp = Modifier && !strcmp(Modifier, "mem"); @@ -475,38 +459,17 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' << MO.getIndex(); - if (TM.getRelocationModel() == Reloc::PIC_) { - if (Subtarget->isPICStyleStub()) - O << "-\"" << TAI->getPrivateGlobalPrefix() << getFunctionNumber() - << "$pb\""; - else if (Subtarget->isPICStyleGOT()) - O << "@GOTOFF"; - } - printOffset(MO.getOffset()); - - if (isMemOp && Subtarget->isPICStyleRIPRel() && !NotRIPRel) - O << "(%rip)"; - return; + break; } case MachineOperand::MO_GlobalAddress: { bool isMemOp = Modifier && !strcmp(Modifier, "mem"); - bool needCloseParen = false; const GlobalValue *GV = MO.getGlobal(); - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); - if (!GVar) { - // If GV is an alias then use the aliasee for determining - // thread-localness. - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - GVar =dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)); - } - - bool isThreadLocal = GVar && GVar->isThreadLocal(); - std::string Name = Mang->getValueName(GV); decorateName(Name, GV); + bool needCloseParen = false; if (!isMemOp) O << '$'; else if (Name[0] == '$') { @@ -517,6 +480,8 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, } if (shouldPrintStub(TM, Subtarget)) { + // DARWIN/X86-32 in != static mode. + // Link-once, declaration, or Weakly-linked global variables need // non-lazily-resolved stubs if (GV->isDeclaration() || GV->isWeakForLinker()) { @@ -539,118 +504,59 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, O << Name; } - if (TM.getRelocationModel() == Reloc::PIC_) - O << '-' << getPICLabelString(getFunctionNumber(), TAI, Subtarget); + if (TM.getRelocationModel() == Reloc::PIC_) { + O << '-'; + PrintPICBaseSymbol(); + } } else { if (GV->hasDLLImportLinkage()) O << "__imp_"; O << Name; } - if (GV->hasExternalWeakLinkage()) - ExtWeakSymbols.insert(GV); - printOffset(MO.getOffset()); if (needCloseParen) O << ')'; - bool isRIPRelative = false; - if (isThreadLocal) { - TLSModel::Model model = getTLSModel(GVar, TM.getRelocationModel()); - switch (model) { - case TLSModel::GeneralDynamic: - O << "@TLSGD"; - break; - case TLSModel::LocalDynamic: - // O << "@TLSLD"; // local dynamic not implemented - O << "@TLSGD"; - break; - case TLSModel::InitialExec: - if (Subtarget->is64Bit()) { - assert (!NotRIPRel); - O << "@GOTTPOFF"; - isRIPRelative = true; - } else { - O << "@INDNTPOFF"; - } - break; - case TLSModel::LocalExec: - if (Subtarget->is64Bit()) - O << "@TPOFF"; - else - O << "@NTPOFF"; - break; - default: - assert (0 && "Unknown TLS model"); - } - } else if (isMemOp) { - if (shouldPrintGOT(TM, Subtarget)) { - if (Subtarget->GVRequiresExtraLoad(GV, TM, false)) - O << "@GOT"; - else - O << "@GOTOFF"; - } else if (Subtarget->isPICStyleRIPRel() && - !NotRIPRel) { - if (TM.getRelocationModel() != Reloc::Static) { - if (Subtarget->GVRequiresExtraLoad(GV, TM, false)) - O << "@GOTPCREL"; - } - - isRIPRelative = true; - } - } - - // Use rip when possible to reduce code size, except when - // index or base register are also part of the address. e.g. - // foo(%rip)(%rcx,%rax,4) is not legal. - if (isRIPRelative) - O << "(%rip)"; - - return; + break; } - case MachineOperand::MO_ExternalSymbol: { - bool isMemOp = Modifier && !strcmp(Modifier, "mem"); - bool needCloseParen = false; - std::string Name(TAI->getGlobalPrefix()); - Name += MO.getSymbolName(); - - // Print function stub suffix unless it's Mac OS X 10.5 and up. - if (!isMemOp) - O << '$'; - else if (Name[0] == '$') { - // The name begins with a dollar-sign. In order to avoid having it look - // like an integer immediate to the assembler, enclose it in parens. - O << '('; - needCloseParen = true; - } - - O << Name; - - if (shouldPrintPLT(TM, Subtarget)) { - std::string GOTName(TAI->getGlobalPrefix()); - GOTName+="_GLOBAL_OFFSET_TABLE_"; - if (Name == GOTName) - // HACK! Emit extra offset to PC during printing GOT offset to - // compensate for the size of popl instruction. The resulting code - // should look like: - // call .piclabel - // piclabel: - // popl %some_register - // addl $_GLOBAL_ADDRESS_TABLE_ + [.-piclabel], %some_register - O << " + [.-" - << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << ']'; - } - - if (needCloseParen) - O << ')'; - - if (Subtarget->isPICStyleRIPRel()) - O << "(%rip)"; - return; + case MachineOperand::MO_ExternalSymbol: + /// NOTE: MO_ExternalSymbol in a non-pcrel_imm context is *only* generated + /// by _GLOBAL_OFFSET_TABLE_ on X86-32. All others are call operands, which + /// are pcrel_imm's. + assert(!Subtarget->is64Bit() && !Subtarget->isPICStyleRIPRel()); + // These are never used as memory operands. + assert(!(Modifier && !strcmp(Modifier, "mem"))); + + O << '$'; + O << TAI->getGlobalPrefix(); + O << MO.getSymbolName(); + break; } + + switch (MO.getTargetFlags()) { default: - O << "<unknown operand type>"; return; + assert(0 && "Unknown target flag on GV operand"); + case X86II::MO_NO_FLAG: + break; + case X86II::MO_GOT_ABSOLUTE_ADDRESS: + O << " + [.-"; + PrintPICBaseSymbol(); + O << ']'; + break; + case X86II::MO_PIC_BASE_OFFSET: + O << '-'; + PrintPICBaseSymbol(); + break; + case X86II::MO_TLSGD: O << "@TLSGD"; break; + case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break; + case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break; + case X86II::MO_TPOFF: O << "@TPOFF"; break; + case X86II::MO_NTPOFF: O << "@NTPOFF"; break; + case X86II::MO_GOTPCREL: O << "@GOTPCREL"; break; + case X86II::MO_GOT: O << "@GOT"; break; + case X86II::MO_GOTOFF: O << "@GOTOFF"; break; } } @@ -670,25 +576,24 @@ void X86ATTAsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op) { } void X86ATTAsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op, - const char *Modifier, - bool NotRIPRel) { + const char *Modifier) { MachineOperand BaseReg = MI->getOperand(Op); MachineOperand IndexReg = MI->getOperand(Op+2); const MachineOperand &DispSpec = MI->getOperand(Op+3); - NotRIPRel |= IndexReg.getReg() || BaseReg.getReg(); if (DispSpec.isGlobal() || DispSpec.isCPI() || DispSpec.isJTI() || DispSpec.isSymbol()) { - printOperand(MI, Op+3, "mem", NotRIPRel); + printOperand(MI, Op+3, "mem"); } else { int DispVal = DispSpec.getImm(); if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) O << DispVal; } - if (IndexReg.getReg() || BaseReg.getReg()) { + if ((IndexReg.getReg() || BaseReg.getReg()) && + (Modifier == 0 || strcmp(Modifier, "no-rip"))) { unsigned ScaleVal = MI->getOperand(Op+1).getImm(); unsigned BaseRegOperand = 0, IndexRegOperand = 2; @@ -716,14 +621,14 @@ void X86ATTAsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op, } void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op, - const char *Modifier, bool NotRIPRel){ + const char *Modifier) { assert(isMem(MI, Op) && "Invalid memory reference!"); MachineOperand Segment = MI->getOperand(Op+4); if (Segment.getReg()) { printOperand(MI, Op+4, Modifier); O << ':'; } - printLeaMemReference(MI, Op, Modifier, NotRIPRel); + printLeaMemReference(MI, Op, Modifier); } void X86ATTAsmPrinter::printPICJumpTableSetLabel(unsigned uid, @@ -741,13 +646,19 @@ void X86ATTAsmPrinter::printPICJumpTableSetLabel(unsigned uid, if (Subtarget->isPICStyleRIPRel()) O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << uid << '\n'; - else - O << '-' << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << '\n'; + else { + O << '-'; + PrintPICBaseSymbol(); + O << '\n'; + } } + void X86ATTAsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) { - std::string label = getPICLabelString(getFunctionNumber(), TAI, Subtarget); - O << label << '\n' << label << ':'; + PrintPICBaseSymbol(); + O << '\n'; + PrintPICBaseSymbol(); + O << ':'; } @@ -810,7 +721,7 @@ bool X86ATTAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, switch (ExtraCode[0]) { default: return true; // Unknown modifier. case 'c': // Don't print "$" before a global var name or constant. - printOperand(MI, OpNo, "mem", /*NotRIPRel=*/true); + printOperand(MI, OpNo, "mem"); return false; case 'b': // Print QImode register case 'h': // Print QImode high register @@ -823,8 +734,19 @@ bool X86ATTAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, return false; case 'P': // Don't print @PLT, but do print as memory. - printOperand(MI, OpNo, "mem", /*NotRIPRel=*/true); + printOperand(MI, OpNo, "mem"); return false; + + case 'n': { // Negate the immediate or print a '-' before the operand. + // Note: this is a temporary solution. It should be handled target + // independently as part of the 'MC' work. + const MachineOperand &MO = MI->getOperand(OpNo); + if (MO.isImm()) { + O << -MO.getImm(); + return false; + } + O << '-'; + } } } @@ -849,7 +771,7 @@ bool X86ATTAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, // These only apply to registers, ignore on mem. break; case 'P': // Don't print @PLT, but do print as memory. - printMemReference(MI, OpNo, "mem", /*NotRIPRel=*/true); + printMemReference(MI, OpNo, "no-rip"); return false; } } @@ -931,8 +853,13 @@ void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) { /// doInitialization bool X86ATTAsmPrinter::doInitialization(Module &M) { - if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling()) - MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + if (NewAsmPrinter) { + Context = new MCContext(); + // FIXME: Send this to "O" instead of outs(). For now, we force it to + // stdout to make it easy to compare. + Streamer = createAsmStreamer(*Context, outs()); + } + return AsmPrinter::doInitialization(M); } @@ -956,6 +883,8 @@ void X86ATTAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { std::string name = Mang->getValueName(GVar); Constant *C = GVar->getInitializer(); + if (isa<MDNode>(C) || isa<MDString>(C)) + return; const Type *Type = C->getType(); unsigned Size = TD->getTypeAllocSize(Type); unsigned Align = TD->getPreferredAlignmentLog(GVar); @@ -1068,25 +997,6 @@ void X86ATTAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { EmitGlobalConstant(C); } -/// printGVStub - Print stub for a global value. -/// -void X86ATTAsmPrinter::printGVStub(const char *GV, const char *Prefix) { - printSuffixedName(GV, "$non_lazy_ptr", Prefix); - O << ":\n\t.indirect_symbol "; - if (Prefix) O << Prefix; - O << GV << "\n\t.long\t0\n"; -} - -/// printHiddenGVStub - Print stub for a hidden global value. -/// -void X86ATTAsmPrinter::printHiddenGVStub(const char *GV, const char *Prefix) { - EmitAlignment(2); - printSuffixedName(GV, "$non_lazy_ptr", Prefix); - if (Prefix) O << Prefix; - O << ":\n" << TAI->getData32bitsDirective() << GV << '\n'; -} - - bool X86ATTAsmPrinter::doFinalization(Module &M) { // Print out module-level global variables here. for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); @@ -1095,100 +1005,62 @@ bool X86ATTAsmPrinter::doFinalization(Module &M) { if (I->hasDLLExportLinkage()) DLLExportedGVs.insert(Mang->makeNameProper(I->getName(),"")); - - // If the global is a extern weak symbol, remember to emit the weak - // reference! - // FIXME: This is rather hacky, since we'll emit references to ALL weak - // stuff, not used. But currently it's the only way to deal with extern weak - // initializers hidden deep inside constant expressions. - if (I->hasExternalWeakLinkage()) - ExtWeakSymbols.insert(I); - } - - for (Module::const_iterator I = M.begin(), E = M.end(); - I != E; ++I) { - // If the global is a extern weak symbol, remember to emit the weak - // reference! - // FIXME: This is rather hacky, since we'll emit references to ALL weak - // stuff, not used. But currently it's the only way to deal with extern weak - // initializers hidden deep inside constant expressions. - if (I->hasExternalWeakLinkage()) - ExtWeakSymbols.insert(I); } - // Output linker support code for dllexported globals - if (!DLLExportedGVs.empty()) - SwitchToDataSection(".section .drectve"); - - for (StringSet<>::iterator i = DLLExportedGVs.begin(), - e = DLLExportedGVs.end(); - i != e; ++i) - O << "\t.ascii \" -export:" << i->getKeyData() << ",data\"\n"; - - if (!DLLExportedFns.empty()) { - SwitchToDataSection(".section .drectve"); - } - - for (StringSet<>::iterator i = DLLExportedFns.begin(), - e = DLLExportedFns.end(); - i != e; ++i) - O << "\t.ascii \" -export:" << i->getKeyData() << "\"\n"; - if (Subtarget->isTargetDarwin()) { SwitchToDataSection(""); - - // Output stubs for dynamically-linked functions - for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end(); - i != e; ++i) { - SwitchToDataSection("\t.section __IMPORT,__jump_table,symbol_stubs," - "self_modifying_code+pure_instructions,5", 0); - const char *p = i->getKeyData(); - printSuffixedName(p, "$stub"); - O << ":\n" - "\t.indirect_symbol " << p << "\n" - "\thlt ; hlt ; hlt ; hlt ; hlt\n"; - } - - O << '\n'; - - // Print global value stubs. - bool InStubSection = false; + + // Add the (possibly multiple) personalities to the set of global value + // stubs. Only referenced functions get into the Personalities list. if (TAI->doesSupportExceptionHandling() && MMI && !Subtarget->is64Bit()) { - // Add the (possibly multiple) personalities to the set of global values. - // Only referenced functions get into the Personalities list. - const std::vector<Function *>& Personalities = MMI->getPersonalities(); - for (std::vector<Function *>::const_iterator I = Personalities.begin(), - E = Personalities.end(); I != E; ++I) { - if (!*I) + const std::vector<Function*> &Personalities = MMI->getPersonalities(); + for (unsigned i = 0, e = Personalities.size(); i != e; ++i) { + if (Personalities[i] == 0) continue; - if (!InStubSection) { - SwitchToDataSection( - "\t.section __IMPORT,__pointers,non_lazy_symbol_pointers"); - InStubSection = true; - } - printGVStub((*I)->getNameStart(), "_"); + std::string Name = Mang->getValueName(Personalities[i]); + decorateName(Name, Personalities[i]); + GVStubs.insert(Name); } } + // Output stubs for dynamically-linked functions + if (!FnStubs.empty()) { + for (StringSet<>::iterator I = FnStubs.begin(), E = FnStubs.end(); + I != E; ++I) { + SwitchToDataSection("\t.section __IMPORT,__jump_table,symbol_stubs," + "self_modifying_code+pure_instructions,5", 0); + const char *Name = I->getKeyData(); + printSuffixedName(Name, "$stub"); + O << ":\n" + "\t.indirect_symbol " << Name << "\n" + "\thlt ; hlt ; hlt ; hlt ; hlt\n"; + } + O << '\n'; + } + // Output stubs for external and common global variables. - if (!InStubSection && !GVStubs.empty()) + if (!GVStubs.empty()) { SwitchToDataSection( "\t.section __IMPORT,__pointers,non_lazy_symbol_pointers"); - for (StringSet<>::iterator i = GVStubs.begin(), e = GVStubs.end(); - i != e; ++i) - printGVStub(i->getKeyData()); + for (StringSet<>::iterator I = GVStubs.begin(), E = GVStubs.end(); + I != E; ++I) { + const char *Name = I->getKeyData(); + printSuffixedName(Name, "$non_lazy_ptr"); + O << ":\n\t.indirect_symbol " << Name << "\n\t.long\t0\n"; + } + } if (!HiddenGVStubs.empty()) { SwitchToSection(TAI->getDataSection()); - for (StringSet<>::iterator i = HiddenGVStubs.begin(), e = HiddenGVStubs.end(); - i != e; ++i) - printHiddenGVStub(i->getKeyData()); + EmitAlignment(2); + for (StringSet<>::iterator I = HiddenGVStubs.begin(), + E = HiddenGVStubs.end(); I != E; ++I) { + const char *Name = I->getKeyData(); + printSuffixedName(Name, "$non_lazy_ptr"); + O << ":\n" << TAI->getData32bitsDirective() << Name << '\n'; + } } - // Emit final debug information. - if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling()) - DW->EndModule(); - // Funny Darwin hack: This flag tells the linker that no global symbols // contain code that falls through to other global symbols (e.g. the obvious // implementation of multiple entry points). If this doesn't occur, the @@ -1204,17 +1076,40 @@ bool X86ATTAsmPrinter::doFinalization(Module &M) { << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT) << ";\t.endef\n"; } - - // Emit final debug information. - if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling()) - DW->EndModule(); - } else if (Subtarget->isTargetELF()) { - // Emit final debug information. - if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling()) - DW->EndModule(); } - - return AsmPrinter::doFinalization(M); + + + // Output linker support code for dllexported globals on windows. + if (!DLLExportedGVs.empty()) { + SwitchToDataSection(".section .drectve"); + + for (StringSet<>::iterator i = DLLExportedGVs.begin(), + e = DLLExportedGVs.end(); i != e; ++i) + O << "\t.ascii \" -export:" << i->getKeyData() << ",data\"\n"; + } + + if (!DLLExportedFns.empty()) { + SwitchToDataSection(".section .drectve"); + + for (StringSet<>::iterator i = DLLExportedFns.begin(), + e = DLLExportedFns.end(); + i != e; ++i) + O << "\t.ascii \" -export:" << i->getKeyData() << "\"\n"; + } + + // Do common shutdown. + bool Changed = AsmPrinter::doFinalization(M); + + if (NewAsmPrinter) { + Streamer->Finish(); + + delete Streamer; + delete Context; + Streamer = 0; + Context = 0; + } + + return Changed; } // Include the auto-generated portion of the assembly writer. diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h index 68a6bc8..bd96115 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h @@ -27,17 +27,23 @@ namespace llvm { class MachineJumpTableInfo; +class MCContext; class MCInst; +class MCStreamer; class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter { - MachineModuleInfo *MMI; const X86Subtarget *Subtarget; + + MCContext *Context; + MCStreamer *Streamer; public: explicit X86ATTAsmPrinter(raw_ostream &O, X86TargetMachine &TM, const TargetAsmInfo *T, CodeGenOpt::Level OL, bool V) - : AsmPrinter(O, TM, T, OL, V), MMI(0) { + : AsmPrinter(O, TM, T, OL, V) { Subtarget = &TM.getSubtarget<X86Subtarget>(); + Context = 0; + Streamer = 0; } virtual const char *getPassName() const { @@ -69,7 +75,7 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter { bool printInstruction(const MCInst *MI); void printOperand(const MCInst *MI, unsigned OpNo, - const char *Modifier = 0, bool NotRIPRel = false); + const char *Modifier = 0); void printMemReference(const MCInst *MI, unsigned Op); void printLeaMemReference(const MCInst *MI, unsigned Op); void printSSECC(const MCInst *MI, unsigned Op); @@ -117,7 +123,7 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter { // These methods are used by the tablegen'erated instruction printer. void printOperand(const MachineInstr *MI, unsigned OpNo, - const char *Modifier = 0, bool NotRIPRel = false); + const char *Modifier = 0); void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo); void printi8mem(const MachineInstr *MI, unsigned OpNo) { printMemReference(MI, OpNo); @@ -165,9 +171,9 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter { void printMachineInstruction(const MachineInstr *MI); void printSSECC(const MachineInstr *MI, unsigned Op); void printMemReference(const MachineInstr *MI, unsigned Op, - const char *Modifier=NULL, bool NotRIPRel = false); + const char *Modifier=NULL); void printLeaMemReference(const MachineInstr *MI, unsigned Op, - const char *Modifier=NULL, bool NotRIPRel = false); + const char *Modifier=NULL); void printPICJumpTableSetLabel(unsigned uid, const MachineBasicBlock *MBB) const; void printPICJumpTableSetLabel(unsigned uid, unsigned uid2, @@ -181,9 +187,8 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter { void printPICLabel(const MachineInstr *MI, unsigned Op); void printModuleLevelGV(const GlobalVariable* GVar); - void printGVStub(const char *GV, const char *Prefix = NULL); - void printHiddenGVStub(const char *GV, const char *Prefix = NULL); - + void PrintPICBaseSymbol() const; + bool runOnMachineFunction(MachineFunction &F); void emitFunctionHeader(const MachineFunction &MF); diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp index 9d50edc..fa0ee75 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp @@ -65,7 +65,7 @@ void X86ATTAsmPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) { void X86ATTAsmPrinter::printOperand(const MCInst *MI, unsigned OpNo, - const char *Modifier, bool NotRIPRel) { + const char *Modifier) { assert(Modifier == 0 && "Modifiers should not be used"); const MCOperand &Op = MI->getOperand(OpNo); @@ -93,13 +93,11 @@ void X86ATTAsmPrinter::printOperand(const MCInst *MI, unsigned OpNo, } void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) { - bool NotRIPRel = false; const MCOperand &BaseReg = MI->getOperand(Op); const MCOperand &IndexReg = MI->getOperand(Op+2); const MCOperand &DispSpec = MI->getOperand(Op+3); - NotRIPRel |= IndexReg.getReg() || BaseReg.getReg(); if (DispSpec.isImm()) { int64_t DispVal = DispSpec.getImm(); if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) @@ -108,7 +106,7 @@ void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) { abort(); //assert(DispSpec.isGlobal() || DispSpec.isCPI() || // DispSpec.isJTI() || DispSpec.isSymbol()); - //printOperand(MI, Op+3, "mem", NotRIPRel); + //printOperand(MI, Op+3, "mem"); } if (IndexReg.getReg() || BaseReg.getReg()) { diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index a39203b..d1623d6 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -47,8 +47,5 @@ namespace { extern "C" int X86AsmPrinterForceLink; int X86AsmPrinterForceLink = 0; -// Force static initialization when called from -// llvm/InitializeAllAsmPrinters.h -namespace llvm { - void InitializeX86AsmPrinter() { } -} +// Force static initialization. +extern "C" void LLVMInitializeX86AsmPrinter() { } diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 8df138d..4d26364 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -55,6 +55,13 @@ def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true", def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", "Support SSE 4a instructions">; +def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true", + "Enable AVX instructions">; +def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true", + "Enable three-operand fused multiple-add">; +def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", + "Enable four-operand fused multiple-add">; + //===----------------------------------------------------------------------===// // X86 processors supported. //===----------------------------------------------------------------------===// @@ -82,6 +89,9 @@ def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>; +// Sandy Bridge does not have FMA +def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>; def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>; diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index e988a5c..d5846a0 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -301,7 +301,7 @@ bool Emitter<CodeEmitter>::gvNeedsNonLazyPtr(const GlobalValue *GV) { template<class CodeEmitter> void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp, - int DispVal, intptr_t PCAdj) { + int DispVal, intptr_t PCAdj) { // If this is a simple integer displacement that doesn't require a relocation, // emit it now. if (!RelocOp) { @@ -371,8 +371,10 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, // Is a SIB byte needed? if ((!Is64BitMode || DispForReloc || BaseReg != 0) && IndexReg.getReg() == 0 && - (BaseReg == 0 || getX86RegNum(BaseReg) != N86::ESP)) { - if (BaseReg == 0) { // Just a displacement? + (BaseReg == 0 || BaseReg == X86::RIP || + getX86RegNum(BaseReg) != N86::ESP)) { + if (BaseReg == 0 || + BaseReg == X86::RIP) { // Just a displacement? // Emit special case [disp32] encoding MCE.emitByte(ModRMByte(0, RegOpcodeField, 5)); diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 2bcfd76..8a21b35 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -396,8 +396,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { // Constant-offset addressing. Disp += CI->getSExtValue() * S; } else if (IndexReg == 0 && - (!AM.GV || - !getTargetMachine()->symbolicAddressesAreRIPRel()) && + (!AM.GV || !Subtarget->isPICStyleRIPRel()) && (S == 1 || S == 2 || S == 4 || S == 8)) { // Scaled-index addressing. Scale = S; @@ -432,7 +431,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { return false; // RIP-relative addresses can't have additional register operands. - if (getTargetMachine()->symbolicAddressesAreRIPRel() && + if (Subtarget->isPICStyleRIPRel() && (AM.Base.Reg != 0 || AM.IndexReg != 0)) return false; @@ -443,6 +442,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { // Set up the basic address. AM.GV = GV; + if (!isCall && TM.getRelocationModel() == Reloc::PIC_ && !Subtarget->is64Bit()) @@ -481,12 +481,16 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { // Prevent loading GV stub multiple times in same MBB. LocalValueMap[V] = AM.Base.Reg; + } else if (Subtarget->isPICStyleRIPRel()) { + // Use rip-relative addressing if we can. + AM.Base.Reg = X86::RIP; } + return true; } // If all else fails, try to materialize the value in a register. - if (!AM.GV || !getTargetMachine()->symbolicAddressesAreRIPRel()) { + if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { if (AM.Base.Reg == 0) { AM.Base.Reg = getRegForValue(V); return AM.Base.Reg != 0; @@ -1140,12 +1144,10 @@ bool X86FastISel::X86SelectCall(Instruction *I) { return false; unsigned CalleeOp = 0; GlobalValue *GV = 0; - if (CalleeAM.Base.Reg != 0) { - assert(CalleeAM.GV == 0); - CalleeOp = CalleeAM.Base.Reg; - } else if (CalleeAM.GV != 0) { - assert(CalleeAM.GV != 0); + if (CalleeAM.GV != 0) { GV = CalleeAM.GV; + } else if (CalleeAM.Base.Reg != 0) { + CalleeOp = CalleeAM.Base.Reg; } else return false; @@ -1493,15 +1495,22 @@ unsigned X86FastISel::TargetMaterializeConstant(Constant *C) { // x86-32 PIC requires a PIC base register for constant pools. unsigned PICBase = 0; - if (TM.getRelocationModel() == Reloc::PIC_ && - !Subtarget->is64Bit()) - PICBase = getInstrInfo()->getGlobalBaseReg(&MF); + unsigned char OpFlag = 0; + if (TM.getRelocationModel() == Reloc::PIC_) { + if (Subtarget->isPICStyleStub()) { + OpFlag = X86II::MO_PIC_BASE_OFFSET; + PICBase = getInstrInfo()->getGlobalBaseReg(&MF); + } else if (Subtarget->isPICStyleGOT()) { + OpFlag = X86II::MO_GOTOFF; + PICBase = getInstrInfo()->getGlobalBaseReg(&MF); + } + } // Create the load from the constant pool. unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align); unsigned ResultReg = createResultReg(RC); - addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), MCPOffset, - PICBase); + addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), + MCPOffset, PICBase, OpFlag); return ResultReg; } diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 9cedafc..1336177 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -65,7 +65,6 @@ namespace { int FrameIndex; } Base; - bool isRIPRel; // RIP as base? unsigned Scale; SDValue IndexReg; int32_t Disp; @@ -75,15 +74,35 @@ namespace { const char *ES; int JT; unsigned Align; // CP alignment. + unsigned char SymbolFlags; // X86II::MO_* X86ISelAddressMode() - : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0), - Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0) { + : BaseType(RegBase), Scale(1), IndexReg(), Disp(0), + Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0), SymbolFlags(0) { } bool hasSymbolicDisplacement() const { return GV != 0 || CP != 0 || ES != 0 || JT != -1; } + + bool hasBaseOrIndexReg() const { + return IndexReg.getNode() != 0 || Base.Reg.getNode() != 0; + } + + /// isRIPRelative - Return true if this addressing mode is already RIP + /// relative. + bool isRIPRelative() const { + if (BaseType != RegBase) return false; + if (RegisterSDNode *RegNode = + dyn_cast_or_null<RegisterSDNode>(Base.Reg.getNode())) + return RegNode->getReg() == X86::RIP; + return false; + } + + void setBaseReg(SDValue Reg) { + BaseType = RegBase; + Base.Reg = Reg; + } void dump() { cerr << "X86ISelAddressMode " << this << "\n"; @@ -91,7 +110,7 @@ namespace { if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump(); else cerr << "nul"; cerr << " Base.FrameIndex " << Base.FrameIndex << "\n"; - cerr << "isRIPRel " << isRIPRel << " Scale" << Scale << "\n"; + cerr << " Scale" << Scale << "\n"; cerr << "IndexReg "; if (IndexReg.getNode() != 0) IndexReg.getNode()->dump(); else cerr << "nul"; @@ -200,14 +219,15 @@ namespace { // These are 32-bit even in 64-bit mode since RIP relative offset // is 32-bit. if (AM.GV) - Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp); + Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp, + AM.SymbolFlags); else if (AM.CP) Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, - AM.Align, AM.Disp); + AM.Align, AM.Disp, AM.SymbolFlags); else if (AM.ES) - Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32); + Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags); else if (AM.JT != -1) - Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32); + Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); else Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32); @@ -683,61 +703,80 @@ bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) { return true; } +/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes +/// into an addressing mode. These wrap things that will resolve down into a +/// symbol reference. If no match is possible, this returns true, otherwise it +/// returns false. bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { - bool SymbolicAddressesAreRIPRel = - getTargetMachine().symbolicAddressesAreRIPRel(); - bool is64Bit = Subtarget->is64Bit(); - DOUT << "Wrapper: 64bit " << is64Bit; - DOUT << " AM "; DEBUG(AM.dump()); DOUT << "\n"; - - // Under X86-64 non-small code model, GV (and friends) are 64-bits. - if (is64Bit && (TM.getCodeModel() != CodeModel::Small)) - return true; - - // Base and index reg must be 0 in order to use rip as base. - bool canUsePICRel = !AM.Base.Reg.getNode() && !AM.IndexReg.getNode(); - if (is64Bit && !canUsePICRel && SymbolicAddressesAreRIPRel) - return true; - + // If the addressing mode already has a symbol as the displacement, we can + // never match another symbol. if (AM.hasSymbolicDisplacement()) return true; - // If value is available in a register both base and index components have - // been picked, we can't fit the result available in the register in the - // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement. SDValue N0 = N.getOperand(0); - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { - uint64_t Offset = G->getOffset(); - if (!is64Bit || isInt32(AM.Disp + Offset)) { - GlobalValue *GV = G->getGlobal(); - bool isRIPRel = SymbolicAddressesAreRIPRel; - if (N0.getOpcode() == llvm::ISD::TargetGlobalTLSAddress) { - TLSModel::Model model = - getTLSModel (GV, TM.getRelocationModel()); - if (is64Bit && model == TLSModel::InitialExec) - isRIPRel = true; - } - AM.GV = GV; - AM.Disp += Offset; - AM.isRIPRel = isRIPRel; - return false; - } - } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { - uint64_t Offset = CP->getOffset(); - if (!is64Bit || isInt32(AM.Disp + Offset)) { + + // Handle X86-64 rip-relative addresses. We check this before checking direct + // folding because RIP is preferable to non-RIP accesses. + if (Subtarget->is64Bit() && + // Under X86-64 non-small code model, GV (and friends) are 64-bits, so + // they cannot be folded into immediate fields. + // FIXME: This can be improved for kernel and other models? + TM.getCodeModel() == CodeModel::Small && + + // Base and index reg must be 0 in order to use %rip as base and lowering + // must allow RIP. + !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) { + + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { + int64_t Offset = AM.Disp + G->getOffset(); + if (!isInt32(Offset)) return true; + AM.GV = G->getGlobal(); + AM.Disp = Offset; + AM.SymbolFlags = G->getTargetFlags(); + } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { + int64_t Offset = AM.Disp + CP->getOffset(); + if (!isInt32(Offset)) return true; AM.CP = CP->getConstVal(); AM.Align = CP->getAlignment(); - AM.Disp += Offset; - AM.isRIPRel = SymbolicAddressesAreRIPRel; - return false; + AM.Disp = Offset; + AM.SymbolFlags = CP->getTargetFlags(); + } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { + AM.ES = S->getSymbol(); + AM.SymbolFlags = S->getTargetFlags(); + } else { + JumpTableSDNode *J = cast<JumpTableSDNode>(N0); + AM.JT = J->getIndex(); + AM.SymbolFlags = J->getTargetFlags(); } - } else if (ExternalSymbolSDNode *S =dyn_cast<ExternalSymbolSDNode>(N0)) { - AM.ES = S->getSymbol(); - AM.isRIPRel = SymbolicAddressesAreRIPRel; + + if (N.getOpcode() == X86ISD::WrapperRIP) + AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64)); return false; - } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { - AM.JT = J->getIndex(); - AM.isRIPRel = SymbolicAddressesAreRIPRel; + } + + // Handle the case when globals fit in our immediate field: This is true for + // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit + // mode, this results in a non-RIP-relative computation. + if (!Subtarget->is64Bit() || + (TM.getCodeModel() == CodeModel::Small && + TM.getRelocationModel() == Reloc::Static)) { + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { + AM.GV = G->getGlobal(); + AM.Disp += G->getOffset(); + AM.SymbolFlags = G->getTargetFlags(); + } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { + AM.CP = CP->getConstVal(); + AM.Align = CP->getAlignment(); + AM.Disp += CP->getOffset(); + AM.SymbolFlags = CP->getTargetFlags(); + } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { + AM.ES = S->getSymbol(); + AM.SymbolFlags = S->getTargetFlags(); + } else { + JumpTableSDNode *J = cast<JumpTableSDNode>(N0); + AM.JT = J->getIndex(); + AM.SymbolFlags = J->getTargetFlags(); + } return false; } @@ -756,12 +795,19 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, if (Depth > 5) return MatchAddressBase(N, AM); + // If this is already a %rip relative address, we can only merge immediates + // into it. Instead of handling this in every case, we handle it here. // RIP relative addressing: %rip + 32-bit displacement! - if (AM.isRIPRel) { - if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) { - uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); - if (!is64Bit || isInt32(AM.Disp + Val)) { - AM.Disp += Val; + if (AM.isRIPRelative()) { + // FIXME: JumpTable and ExternalSymbol address currently don't like + // displacements. It isn't very important, but this should be fixed for + // consistency. + if (!AM.ES && AM.JT != -1) return true; + + if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) { + int64_t Val = AM.Disp + Cst->getSExtValue(); + if (isInt32(Val)) { + AM.Disp = Val; return false; } } @@ -785,6 +831,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, break; case X86ISD::Wrapper: + case X86ISD::WrapperRIP: if (!MatchWrapper(N, AM)) return false; break; @@ -804,7 +851,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, break; case ISD::SHL: - if (AM.IndexReg.getNode() != 0 || AM.Scale != 1 || AM.isRIPRel) + if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; if (ConstantSDNode @@ -845,8 +892,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, // X*[3,5,9] -> X+X*[2,4,8] if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.getNode() == 0 && - AM.IndexReg.getNode() == 0 && - !AM.isRIPRel) { + AM.IndexReg.getNode() == 0) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || @@ -895,7 +941,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, break; } // Test if the index field is free for use. - if (AM.IndexReg.getNode() || AM.isRIPRel) { + if (AM.IndexReg.getNode() || AM.isRIPRelative()) { AM = Backup; break; } @@ -966,8 +1012,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, // the add. if (AM.BaseType == X86ISelAddressMode::RegBase && !AM.Base.Reg.getNode() && - !AM.IndexReg.getNode() && - !AM.isRIPRel) { + !AM.IndexReg.getNode()) { AM.Base.Reg = N.getNode()->getOperand(0); AM.IndexReg = N.getNode()->getOperand(1); AM.Scale = 1; @@ -1006,9 +1051,6 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, // Scale must not be used already. if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; - // Not when RIP is used as the base. - if (AM.isRIPRel) break; - SDValue X = Shift.getOperand(0); ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1)); ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); @@ -1130,7 +1172,7 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { // Is the base register already occupied? if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) { // If so, check to see if the scale index register is set. - if (AM.IndexReg.getNode() == 0 && !AM.isRIPRel) { + if (AM.IndexReg.getNode() == 0) { AM.IndexReg = N; AM.Scale = 1; return false; @@ -1157,7 +1199,7 @@ bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base, if (AvoidDupAddrCompute && !N.hasOneUse()) { unsigned Opcode = N.getOpcode(); if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex && - Opcode != X86ISD::Wrapper) { + Opcode != X86ISD::Wrapper && Opcode != X86ISD::WrapperRIP) { // If we are able to fold N into addressing mode, then we'll allow it even // if N has multiple uses. In general, addressing computation is used as // addresses by all of its uses. But watch out for CopyToReg uses, that @@ -1307,7 +1349,8 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, AM.GV = GA->getGlobal(); AM.Disp += GA->getOffset(); AM.Base.Reg = CurDAG->getRegister(0, N.getValueType()); - + AM.SymbolFlags = GA->getTargetFlags(); + if (N.getValueType() == MVT::i32) { AM.Scale = 1; AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32); @@ -1687,7 +1730,8 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { // If N2 is not Wrapper(decriptor) then the llvm.declare is mangled // somehow, just ignore it. - if (N2.getOpcode() != X86ISD::Wrapper) { + if (N2.getOpcode() != X86ISD::Wrapper && + N2.getOpcode() != X86ISD::WrapperRIP) { ReplaceUses(N.getValue(0), Chain); return NULL; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8d0ea66..9614e69 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -19,6 +19,7 @@ #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/Function.h" #include "llvm/Intrinsics.h" @@ -4311,21 +4312,102 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) { ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); - // FIXME there isn't really any debug info here, should come from the parent - DebugLoc dl = CP->getDebugLoc(); + + // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the + // global base reg. + unsigned char OpFlag = 0; + unsigned WrapperKind = X86ISD::Wrapper; + if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { + if (Subtarget->isPICStyleStub()) + OpFlag = X86II::MO_PIC_BASE_OFFSET; + else if (Subtarget->isPICStyleGOT()) + OpFlag = X86II::MO_GOTOFF; + else if (Subtarget->isPICStyleRIPRel() && + getTargetMachine().getCodeModel() == CodeModel::Small) + WrapperKind = X86ISD::WrapperRIP; + } + SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(), - CP->getAlignment()); - Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); + CP->getAlignment(), + CP->getOffset(), OpFlag); + DebugLoc DL = CP->getDebugLoc(); + Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); + // With PIC, the address is actually $g + Offset. + if (OpFlag) { + Result = DAG.getNode(ISD::ADD, DL, getPointerTy(), + DAG.getNode(X86ISD::GlobalBaseReg, + DebugLoc::getUnknownLoc(), getPointerTy()), + Result); + } + + return Result; +} + +SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { + JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + + // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the + // global base reg. + unsigned char OpFlag = 0; + unsigned WrapperKind = X86ISD::Wrapper; + if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { + if (Subtarget->isPICStyleStub()) + OpFlag = X86II::MO_PIC_BASE_OFFSET; + else if (Subtarget->isPICStyleGOT()) + OpFlag = X86II::MO_GOTOFF; + else if (Subtarget->isPICStyleRIPRel()) + WrapperKind = X86ISD::WrapperRIP; + } + + SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(), + OpFlag); + DebugLoc DL = JT->getDebugLoc(); + Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); + + // With PIC, the address is actually $g + Offset. + if (OpFlag) { + Result = DAG.getNode(ISD::ADD, DL, getPointerTy(), + DAG.getNode(X86ISD::GlobalBaseReg, + DebugLoc::getUnknownLoc(), getPointerTy()), + Result); + } + + return Result; +} + +SDValue +X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) { + const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); + + // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the + // global base reg. + unsigned char OpFlag = 0; + unsigned WrapperKind = X86ISD::Wrapper; + if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { + if (Subtarget->isPICStyleStub()) + OpFlag = X86II::MO_PIC_BASE_OFFSET; + else if (Subtarget->isPICStyleGOT()) + OpFlag = X86II::MO_GOTOFF; + else if (Subtarget->isPICStyleRIPRel()) + WrapperKind = X86ISD::WrapperRIP; + } + + SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag); + + DebugLoc DL = Op.getDebugLoc(); + Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); + + // With PIC, the address is actually $g + Offset. if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && !Subtarget->isPICStyleRIPRel()) { - Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), + Result = DAG.getNode(ISD::ADD, DL, getPointerTy(), DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc::getUnknownLoc(), getPointerTy()), Result); } - + return Result; } @@ -4343,9 +4425,29 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, if (!IsPic && !ExtraLoadRequired && isInt32(Offset)) { Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); Offset = 0; - } else - Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0); - Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); + } else { + unsigned char OpFlags = 0; + + if (Subtarget->isPICStyleRIPRel() && + getTargetMachine().getRelocationModel() != Reloc::Static) { + if (ExtraLoadRequired) + OpFlags = X86II::MO_GOTPCREL; + } else if (Subtarget->isPICStyleGOT() && + getTargetMachine().getRelocationModel() == Reloc::PIC_) { + if (ExtraLoadRequired) + OpFlags = X86II::MO_GOT; + else + OpFlags = X86II::MO_GOTOFF; + } + + Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags); + } + + if (Subtarget->isPICStyleRIPRel() && + getTargetMachine().getCodeModel() == CodeModel::Small) + Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result); + else + Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); // With PIC, the address is actually $g + Offset. if (IsPic && !Subtarget->isPICStyleRIPRel()) { @@ -4381,12 +4483,14 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, - SDValue *InFlag, const MVT PtrVT, unsigned ReturnReg) { + SDValue *InFlag, const MVT PtrVT, unsigned ReturnReg, + unsigned char OperandFlags) { SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); DebugLoc dl = GA->getDebugLoc(); SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), - GA->getOffset()); + GA->getOffset(), + OperandFlags); if (InFlag) { SDValue Ops[] = { Chain, TGA, *InFlag }; Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 3); @@ -4410,14 +4514,15 @@ LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, PtrVT), InFlag); InFlag = Chain.getValue(1); - return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX); + return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD); } // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, const MVT PtrVT) { - return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, X86::RAX); + return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, + X86::RAX, X86II::MO_TLSGD); } // Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or @@ -4435,11 +4540,26 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base, NULL, 0); + unsigned char OperandFlags = 0; + // Most TLS accesses are not RIP relative, even on x86-64. One exception is + // initialexec. + unsigned WrapperKind = X86ISD::Wrapper; + if (model == TLSModel::LocalExec) { + OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF; + } else if (is64Bit) { + assert(model == TLSModel::InitialExec); + OperandFlags = X86II::MO_GOTTPOFF; + WrapperKind = X86ISD::WrapperRIP; + } else { + assert(model == TLSModel::InitialExec); + OperandFlags = X86II::MO_INDNTPOFF; + } + // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial // exec) SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), - GA->getOffset()); - SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA); + GA->getOffset(), OperandFlags); + SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); if (model == TLSModel::InitialExec) Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset, @@ -4457,72 +4577,33 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { assert(Subtarget->isTargetELF() && "TLS not implemented for non-ELF targets"); GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); - GlobalValue *GV = GA->getGlobal(); - TLSModel::Model model = - getTLSModel (GV, getTargetMachine().getRelocationModel()); - if (Subtarget->is64Bit()) { - switch (model) { - case TLSModel::GeneralDynamic: - case TLSModel::LocalDynamic: // not implemented + const GlobalValue *GV = GA->getGlobal(); + + // If GV is an alias then use the aliasee for determining + // thread-localness. + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) + GV = GA->resolveAliasedGlobal(false); + + TLSModel::Model model = getTLSModel(GV, + getTargetMachine().getRelocationModel()); + + switch (model) { + case TLSModel::GeneralDynamic: + case TLSModel::LocalDynamic: // not implemented + if (Subtarget->is64Bit()) return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy()); - - case TLSModel::InitialExec: - case TLSModel::LocalExec: - return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, true); - } - } else { - switch (model) { - case TLSModel::GeneralDynamic: - case TLSModel::LocalDynamic: // not implemented - return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); - - case TLSModel::InitialExec: - case TLSModel::LocalExec: - return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, false); - } + return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); + + case TLSModel::InitialExec: + case TLSModel::LocalExec: + return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, + Subtarget->is64Bit()); } + assert(0 && "Unreachable"); return SDValue(); } -SDValue -X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) { - // FIXME there isn't really any debug info here - DebugLoc dl = Op.getDebugLoc(); - const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); - SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); - Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); - // With PIC, the address is actually $g + Offset. - if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && - !Subtarget->isPICStyleRIPRel()) { - Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), - DAG.getNode(X86ISD::GlobalBaseReg, - DebugLoc::getUnknownLoc(), - getPointerTy()), - Result); - } - - return Result; -} - -SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { - JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); - // FIXME there isn't really any debug into here - DebugLoc dl = JT->getDebugLoc(); - SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); - Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); - // With PIC, the address is actually $g + Offset. - if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && - !Subtarget->isPICStyleRIPRel()) { - Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), - DAG.getNode(X86ISD::GlobalBaseReg, - DebugLoc::getUnknownLoc(), - getPointerTy()), - Result); - } - - return Result; -} /// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and /// take a 2 x i32 value to shift plus a shift amount. @@ -6779,6 +6860,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; case X86ISD::Wrapper: return "X86ISD::Wrapper"; + case X86ISD::WrapperRIP: return "X86ISD::WrapperRIP"; case X86ISD::PEXTRB: return "X86ISD::PEXTRB"; case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 063913f..472ba4c 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -45,7 +45,8 @@ def lea64_32mem : Operand<i32> { // Complex Pattern Definitions. // def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr", - [add, mul, X86mul_imm, shl, or, frameindex, X86Wrapper], + [add, mul, X86mul_imm, shl, or, frameindex, X86Wrapper, + X86WrapperRIP], []>; def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr", @@ -1418,6 +1419,9 @@ def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), def : Pat<(i64 (X86Wrapper texternalsym:$dst)), (MOV64ri texternalsym:$dst)>, Requires<[NotSmallCode]>; +// If we have small model and -static mode, it is safe to store global addresses +// directly as immediates. FIXME: This is really a hack, the 'imm' predicate +// should handle this sort of thing. def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst), (MOV64mi32 addr:$dst, tconstpool:$src)>, Requires<[SmallCode, IsStatic]>; @@ -1431,6 +1435,23 @@ def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst), (MOV64mi32 addr:$dst, texternalsym:$src)>, Requires<[SmallCode, IsStatic]>; +// If we have small model and -static mode, it is safe to store global addresses +// directly as immediates. FIXME: This is really a hack, the 'imm' predicate +// should handle this sort of thing. +def : Pat<(store (i64 (X86WrapperRIP tconstpool:$src)), addr:$dst), + (MOV64mi32 addr:$dst, tconstpool:$src)>, + Requires<[SmallCode, IsStatic]>; +def : Pat<(store (i64 (X86WrapperRIP tjumptable:$src)), addr:$dst), + (MOV64mi32 addr:$dst, tjumptable:$src)>, + Requires<[SmallCode, IsStatic]>; +def : Pat<(store (i64 (X86WrapperRIP tglobaladdr:$src)), addr:$dst), + (MOV64mi32 addr:$dst, tglobaladdr:$src)>, + Requires<[SmallCode, IsStatic]>; +def : Pat<(store (i64 (X86WrapperRIP texternalsym:$src)), addr:$dst), + (MOV64mi32 addr:$dst, texternalsym:$src)>, + Requires<[SmallCode, IsStatic]>; + + // Calls // Direct PC relative function call for small code model. 32-bit displacement // sign extended to 64-bit. diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index 39504cd..b50dd65 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -157,10 +157,10 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) { /// inline const MachineInstrBuilder & addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, - unsigned GlobalBaseReg = 0) { + unsigned GlobalBaseReg, unsigned char OpFlags) { //FIXME: factor this return MIB.addReg(GlobalBaseReg).addImm(1).addReg(0) - .addConstantPoolIndex(CPI).addReg(0); + .addConstantPoolIndex(CPI, 0, OpFlags).addReg(0); } } // End llvm namespace diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 8a9b7c9..21f71ec 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -18,6 +18,7 @@ #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" +#include "llvm/GlobalVariable.h" #include "llvm/DerivedTypes.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -28,7 +29,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetAsmInfo.h" - using namespace llvm; namespace { @@ -781,6 +781,29 @@ static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { static inline bool isGVStub(GlobalValue *GV, X86TargetMachine &TM) { return TM.getSubtarget<X86Subtarget>().GVRequiresExtraLoad(GV, TM, false); } + +/// CanRematLoadWithDispOperand - Return true if a load with the specified +/// operand is a candidate for remat: for this to be true we need to know that +/// the load will always return the same value, even if moved. +static bool CanRematLoadWithDispOperand(const MachineOperand &MO, + X86TargetMachine &TM) { + // Loads from constant pool entries can be remat'd. + if (MO.isCPI()) return true; + + // We can remat globals in some cases. + if (MO.isGlobal()) { + // If this is a load of a stub, not of the global, we can remat it. This + // access will always return the address of the global. + if (isGVStub(MO.getGlobal(), TM)) + return true; + + // If the global itself is constant, we can remat the load. + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal())) + if (GV->isConstant()) + return true; + } + return false; +} bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const { @@ -802,11 +825,9 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const { if (MI->getOperand(1).isReg() && MI->getOperand(2).isImm() && MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && - (MI->getOperand(4).isCPI() || - (MI->getOperand(4).isGlobal() && - isGVStub(MI->getOperand(4).getGlobal(), TM)))) { + CanRematLoadWithDispOperand(MI->getOperand(4), TM)) { unsigned BaseReg = MI->getOperand(1).getReg(); - if (BaseReg == 0) + if (BaseReg == 0 || BaseReg == X86::RIP) return true; // Allow re-materialization of PIC load. if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) @@ -3190,9 +3211,8 @@ unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { bool IsPIC = (TM.getRelocationModel() == Reloc::PIC_); bool Is64BitMode = TM.getSubtargetImpl()->is64Bit(); unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode); - if (Desc.getOpcode() == X86::MOVPC32r) { + if (Desc.getOpcode() == X86::MOVPC32r) Size += GetInstSizeWithDesc(*MI, &get(X86::POP32r), IsPIC, Is64BitMode); - } return Size; } @@ -3220,17 +3240,17 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { const TargetInstrInfo *TII = TM.getInstrInfo(); // Operand of MovePCtoStack is completely ignored by asm printer. It's // only used in JIT code emission as displacement to pc. - BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC) - .addImm(0); + BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); // If we're using vanilla 'GOT' PIC style, we should use relative addressing - // not to pc, but to _GLOBAL_ADDRESS_TABLE_ external + // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. if (TM.getRelocationModel() == Reloc::PIC_ && TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) { - GlobalBaseReg = - RegInfo.createVirtualRegister(X86::GR32RegisterClass); + GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); + // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) - .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_"); + .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", 0, + X86II::MO_GOT_ABSOLUTE_ADDRESS); } else { GlobalBaseReg = PC; } diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index e09769e..83f0194 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -71,7 +71,86 @@ namespace X86 { namespace X86II { enum { //===------------------------------------------------------------------===// - // Instruction types. These are the standard/most common forms for X86 + // X86 Specific MachineOperand flags. + + MO_NO_FLAG = 0, + + /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a + /// relocation of: + /// SYMBOL_LABEL + [. - PICBASELABEL] + MO_GOT_ABSOLUTE_ADDRESS = 1, + + /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the + /// immediate should get the value of the symbol minus the PIC base label: + /// SYMBOL_LABEL - PICBASELABEL + MO_PIC_BASE_OFFSET = 2, + + /// MO_GOT - On a symbol operand this indicates that the immediate is the + /// offset to the GOT entry for the symbol name from the base of the GOT. + /// + /// See the X86-64 ELF ABI supplement for more details. + /// SYMBOL_LABEL @GOT + MO_GOT = 3, + + /// MO_GOTOFF - On a symbol operand this indicates that the immediate is + /// the offset to the location of the symbol name from the base of the GOT. + /// + /// See the X86-64 ELF ABI supplement for more details. + /// SYMBOL_LABEL @GOTOFF + MO_GOTOFF = 4, + + /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is + /// offset to the GOT entry for the symbol name from the current code + /// location. + /// + /// See the X86-64 ELF ABI supplement for more details. + /// SYMBOL_LABEL @GOTPCREL + MO_GOTPCREL = 5, + + /// MO_PLT - On a symbol operand this indicates that the immediate is + /// offset to the PLT entry of symbol name from the current code location. + /// + /// See the X86-64 ELF ABI supplement for more details. + /// SYMBOL_LABEL @PLT + MO_PLT = 6, + + /// MO_TLSGD - On a symbol operand this indicates that the immediate is + /// some TLS offset. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @TLSGD + MO_TLSGD = 7, + + /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is + /// some TLS offset. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @GOTTPOFF + MO_GOTTPOFF = 8, + + /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is + /// some TLS offset. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @INDNTPOFF + MO_INDNTPOFF = 9, + + /// MO_TPOFF - On a symbol operand this indicates that the immediate is + /// some TLS offset. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @TPOFF + MO_TPOFF = 10, + + /// MO_NTPOFF - On a symbol operand this indicates that the immediate is + /// some TLS offset. + /// + /// See 'ELF Handling for Thread-Local Storage' for more details. + /// SYMBOL_LABEL @NTPOFF + MO_NTPOFF = 11, + + //===------------------------------------------------------------------===// + // Instruction encodings. These are the standard/most common forms for X86 // instructions. // diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 2d8f55f..a6b0880 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -50,9 +50,9 @@ def SDTX86atomicBinary : SDTypeProfile<2, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisPtrTy<2>, SDTCisInt<3>,SDTCisInt<4>]>; def SDTX86Ret : SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>; -def SDT_X86CallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; -def SDT_X86CallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, - SDTCisVT<1, i32> ]>; +def SDT_X86CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; +def SDT_X86CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, + SDTCisVT<1, i32>]>; def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; @@ -236,6 +236,10 @@ def HasSSE3 : Predicate<"Subtarget->hasSSE3()">; def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">; def HasSSE41 : Predicate<"Subtarget->hasSSE41()">; def HasSSE42 : Predicate<"Subtarget->hasSSE42()">; +def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; +def HasAVX : Predicate<"Subtarget->hasAVX()">; +def HasFMA3 : Predicate<"Subtarget->hasFMA3()">; +def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def In32BitMode : Predicate<"!Subtarget->is64Bit()">; diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 43fadc2..b79a006 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -481,11 +481,11 @@ def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src), // Misc. let Uses = [EDI] in -def MMX_MASKMOVQ : MMXI<0xF7, MRMDestMem, (outs), (ins VR64:$src, VR64:$mask), +def MMX_MASKMOVQ : MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), "maskmovq\t{$mask, $src|$src, $mask}", [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>; let Uses = [RDI] in -def MMX_MASKMOVQ64: MMXI64<0xF7, MRMDestMem, (outs), (ins VR64:$src, VR64:$mask), +def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), "maskmovq\t{$mask, $src|$src, $mask}", [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>; diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index d552cb3..996baa0 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -439,7 +439,7 @@ def GR32 : RegisterClass<"X86", [i32], 32, def GR64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, - RBX, R14, R15, R12, R13, RBP, RSP]> { + RBX, R14, R15, R12, R13, RBP, RSP, RIP]> { let SubRegClassList = [GR8, GR8, GR16, GR32]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; @@ -453,9 +453,9 @@ def GR64 : RegisterClass<"X86", [i64], 64, if (!Subtarget.is64Bit()) return begin(); // None of these are allocatable in 32-bit. if (RI->hasFP(MF)) // Does the function dedicate RBP to being a frame ptr? - return end()-2; // If so, don't allocate RSP or RBP + return end()-3; // If so, don't allocate RIP, RSP or RBP else - return end()-1; // If not, just don't allocate RSP + return end()-2; // If not, just don't allocate RIP or RSP } }]; } diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 56983ce..8506fa6 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -207,6 +207,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; + + HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); + HasAVX = ((ECX >> 28) & 0x1); + if (IsIntel || IsAMD) { // Determine if bit test memory instructions are slow. unsigned Family = 0; @@ -217,6 +221,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); HasX86_64 = (EDX >> 29) & 0x1; HasSSE4A = IsAMD && ((ECX >> 6) & 0x1); + HasFMA4 = IsAMD && ((ECX >> 16) & 0x1); } } @@ -342,6 +347,10 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit) , X86SSELevel(NoMMXSSE) , X863DNowLevel(NoThreeDNow) , HasX86_64(false) + , HasSSE4A(false) + , HasAVX(false) + , HasFMA3(false) + , HasFMA4(false) , IsBTMemSlow(false) , DarwinVers(0) , IsLinux(false) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 694b0eb..f4f6cce 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -64,12 +64,21 @@ protected: /// bool HasX86_64; - /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. - bool IsBTMemSlow; - /// HasSSE4A - True if the processor supports SSE4A instructions. bool HasSSE4A; + /// HasAVX - Target has AVX instructions + bool HasAVX; + + /// HasFMA3 - Target has 3-operand fused multiply-add + bool HasFMA3; + + /// HasFMA4 - Target has 4-operand fused multiply-add + bool HasFMA4; + + /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. + bool IsBTMemSlow; + /// DarwinVers - Nonzero if this is a darwin platform: the numeric /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc. unsigned char DarwinVers; // Is any darwin-x86 platform. @@ -133,6 +142,9 @@ public: bool hasSSE4A() const { return HasSSE4A; } bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } + bool hasAVX() const { return hasAVX(); } + bool hasFMA3() const { return HasFMA3; } + bool hasFMA4() const { return HasFMA4; } bool isBTMemSlow() const { return IsBTMemSlow; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 53c46c3..67dcd01 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -36,10 +36,8 @@ X("x86", "32-bit X86: Pentium-Pro and above"); static RegisterTarget<X86_64TargetMachine> Y("x86-64", "64-bit X86: EM64T and AMD64"); -// Force static initialization when called from llvm/InitializeAllTargets.h -namespace llvm { - void InitializeX86Target() { } -} +// Force static initialization. +extern "C" void LLVMInitializeX86Target() { } // No assembler printer by default X86TargetMachine::AsmPrinterCtorFn X86TargetMachine::AsmPrinterCtor = 0; @@ -222,7 +220,8 @@ bool X86TargetMachine::addAssemblyEmitter(PassManagerBase &PM, // On Darwin, override 64-bit static relocation to pic_ since the // assembler doesn't support it. if (DefRelocModel == Reloc::Static && - Subtarget.isTargetDarwin() && Subtarget.is64Bit()) + Subtarget.isTargetDarwin() && Subtarget.is64Bit() && + getCodeModel() == CodeModel::Small) setRelocationModel(Reloc::PIC_); assert(AsmPrinterCtor && "AsmPrinter was not linked in"); @@ -319,11 +318,3 @@ bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, return false; } -/// symbolicAddressesAreRIPRel - Return true if symbolic addresses are -/// RIP-relative on this machine, taking into consideration the relocation -/// model and subtarget. RIP-relative addresses cannot have a separate -/// base or index register. -bool X86TargetMachine::symbolicAddressesAreRIPRel() const { - return getRelocationModel() != Reloc::Static && - Subtarget.isPICStyleRIPRel(); -} diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index ecc1d39..ba73ca8 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -91,12 +91,6 @@ public: virtual bool addSimpleCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, bool DumpAsm, JITCodeEmitter &JCE); - - /// symbolicAddressesAreRIPRel - Return true if symbolic addresses are - /// RIP-relative on this machine, taking into consideration the relocation - /// model and subtarget. RIP-relative addresses cannot have a separate - /// base or index register. - bool symbolicAddressesAreRIPRel() const; }; /// X86_32TargetMachine - X86 32-bit target machine. |