diff options
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC')
50 files changed, 2447 insertions, 784 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index b6a0835..61d23ce 100644 --- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -17,16 +17,12 @@ #include "MCTargetDesc/PPCPredicates.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -#define GET_INSTRUCTION_NAME #include "PPCGenAsmWriter.inc" -StringRef PPCInstPrinter::getOpcodeName(unsigned Opcode) const { - return getInstructionName(Opcode); -} - void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << getRegisterName(RegNo); } @@ -94,7 +90,6 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, unsigned Code = MI->getOperand(OpNo).getImm(); if (StringRef(Modifier) == "cc") { switch ((PPC::Predicate)Code) { - default: assert(0 && "Invalid predicate"); case PPC::PRED_ALWAYS: return; // Don't print anything for always. case PPC::PRED_LT: O << "lt"; return; case PPC::PRED_LE: O << "le"; return; @@ -175,7 +170,7 @@ void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo, unsigned CCReg = MI->getOperand(OpNo).getReg(); unsigned RegNo; switch (CCReg) { - default: assert(0 && "Unknown CR register"); + default: llvm_unreachable("Unknown CR register"); case PPC::CR0: RegNo = 0; break; case PPC::CR1: RegNo = 1; break; case PPC::CR2: RegNo = 2; break; diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 4ed4b76..73fd534 100644 --- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -1,4 +1,4 @@ -//===-- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax ----------===// +//===- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -24,8 +24,9 @@ class PPCInstPrinter : public MCInstPrinter { // 0 -> AIX, 1 -> Darwin. unsigned SyntaxVariant; public: - PPCInstPrinter(const MCAsmInfo &MAI, unsigned syntaxVariant) - : MCInstPrinter(MAI), SyntaxVariant(syntaxVariant) {} + PPCInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI, unsigned syntaxVariant) + : MCInstPrinter(MAI, MII, MRI), SyntaxVariant(syntaxVariant) {} bool isDarwinSyntax() const { return SyntaxVariant == 1; @@ -33,9 +34,6 @@ public: virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); - virtual StringRef getOpcodeName(unsigned Opcode) const; - - static const char *getInstructionName(unsigned Opcode); // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 9f2fd6d..48de583 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -7,10 +7,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCAsmBackend.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCObjectWriter.h" @@ -57,13 +58,6 @@ public: MCValue Target, uint64_t &FixedValue) {} }; -class PPCELFObjectWriter : public MCELFObjectTargetWriter { -public: - PPCELFObjectWriter(bool Is64Bit, Triple::OSType OSType, uint16_t EMachine, - bool HasRelocationAddend, bool isLittleEndian) - : MCELFObjectTargetWriter(Is64Bit, OSType, EMachine, HasRelocationAddend) {} -}; - class PPCAsmBackend : public MCAsmBackend { const Target &TheTarget; public: @@ -80,33 +74,42 @@ public: { "fixup_ppc_ha16", 16, 16, 0 }, { "fixup_ppc_lo14", 16, 14, 0 } }; - + if (Kind < FirstTargetFixupKind) return MCAsmBackend::getFixupKindInfo(Kind); - + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); return Infos[Kind - FirstTargetFixupKind]; } - - bool MayNeedRelaxation(const MCInst &Inst) const { + + bool mayNeedRelaxation(const MCInst &Inst) const { // FIXME. return false; } - - void RelaxInstruction(const MCInst &Inst, MCInst &Res) const { + + bool fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const { // FIXME. - assert(0 && "RelaxInstruction() unimplemented"); + llvm_unreachable("relaxInstruction() unimplemented"); } - - bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const { + + + void relaxInstruction(const MCInst &Inst, MCInst &Res) const { + // FIXME. + llvm_unreachable("relaxInstruction() unimplemented"); + } + + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const { // FIXME: Zero fill for now. That's not right, but at least will get the // section size right. for (uint64_t i = 0; i != Count; ++i) OW->Write8(0); return true; - } - + } + unsigned getPointerSize() const { StringRef Name = TheTarget.getName(); if (Name == "ppc64") return 8; @@ -122,12 +125,12 @@ namespace { class DarwinPPCAsmBackend : public PPCAsmBackend { public: DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { } - - void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { - assert(0 && "UNIMP"); + llvm_unreachable("UNIMP"); } - + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { bool is64 = getPointerSize() == 8; return createMachObjectWriter(new PPCMachObjectWriter( @@ -137,19 +140,19 @@ namespace { object::mach::CSPPC_ALL), OS, /*IsLittleEndian=*/false); } - + virtual bool doesSectionRequireSymbols(const MCSection &Section) const { return false; } }; class ELFPPCAsmBackend : public PPCAsmBackend { - Triple::OSType OSType; + uint8_t OSABI; public: - ELFPPCAsmBackend(const Target &T, Triple::OSType OSType) : - PPCAsmBackend(T), OSType(OSType) { } - - void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + ELFPPCAsmBackend(const Target &T, uint8_t OSABI) : + PPCAsmBackend(T), OSABI(OSABI) { } + + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { Value = adjustFixupValue(Fixup.getKind(), Value); if (!Value) return; // Doesn't change encoding. @@ -162,17 +165,12 @@ namespace { for (unsigned i = 0; i != 4; ++i) Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff); } - + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { bool is64 = getPointerSize() == 8; - return createELFObjectWriter(new PPCELFObjectWriter( - /*Is64Bit=*/is64, - OSType, - is64 ? ELF::EM_PPC64 : ELF::EM_PPC, - /*addend*/ true, /*isLittleEndian*/ false), - OS, /*IsLittleEndian=*/false); + return createPPCELFObjectWriter(OS, is64, OSABI); } - + virtual bool doesSectionRequireSymbols(const MCSection &Section) const { return false; } @@ -187,5 +185,6 @@ MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT) { if (Triple(TT).isOSDarwin()) return new DarwinPPCAsmBackend(T); - return new ELFPPCAsmBackend(T, Triple(TT).getOS()); + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); + return new ELFPPCAsmBackend(T, OSABI); } diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h index 369bbdc..9c975c0 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h @@ -1,4 +1,4 @@ -//===-- PPCBaseInfo.h - Top level definitions for PPC -------- --*- C++ -*-===// +//===-- PPCBaseInfo.h - Top level definitions for PPC -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp new file mode 100644 index 0000000..a197981 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -0,0 +1,103 @@ +//===-- PPCELFObjectWriter.cpp - PPC ELF Writer ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/PPCFixupKinds.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +namespace { + class PPCELFObjectWriter : public MCELFObjectTargetWriter { + public: + PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI); + + virtual ~PPCELFObjectWriter(); + protected: + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend) const; + virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset); + }; +} + +PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) + : MCELFObjectTargetWriter(Is64Bit, OSABI, + Is64Bit ? ELF::EM_PPC64 : ELF::EM_PPC, + /*HasRelocationAddend*/ true) {} + +PPCELFObjectWriter::~PPCELFObjectWriter() { +} + +unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel, + bool IsRelocWithSymbol, + int64_t Addend) const { + // determine the type of the relocation + unsigned Type; + if (IsPCRel) { + switch ((unsigned)Fixup.getKind()) { + default: + llvm_unreachable("Unimplemented"); + case PPC::fixup_ppc_br24: + Type = ELF::R_PPC_REL24; + break; + case FK_PCRel_4: + Type = ELF::R_PPC_REL32; + break; + } + } else { + switch ((unsigned)Fixup.getKind()) { + default: llvm_unreachable("invalid fixup kind!"); + case PPC::fixup_ppc_br24: + Type = ELF::R_PPC_ADDR24; + break; + case PPC::fixup_ppc_brcond14: + Type = ELF::R_PPC_ADDR14_BRTAKEN; // XXX: or BRNTAKEN?_ + break; + case PPC::fixup_ppc_ha16: + Type = ELF::R_PPC_ADDR16_HA; + break; + case PPC::fixup_ppc_lo16: + Type = ELF::R_PPC_ADDR16_LO; + break; + case PPC::fixup_ppc_lo14: + Type = ELF::R_PPC_ADDR14; + break; + case FK_Data_4: + Type = ELF::R_PPC_ADDR32; + break; + case FK_Data_2: + Type = ELF::R_PPC_ADDR16; + break; + } + } + return Type; +} + +void PPCELFObjectWriter:: +adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { + switch ((unsigned)Fixup.getKind()) { + case PPC::fixup_ppc_ha16: + case PPC::fixup_ppc_lo16: + RelocOffset += 2; + break; + default: + break; + } +} + +MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS, + bool Is64Bit, + uint8_t OSABI) { + MCELFObjectTargetWriter *MOTW = new PPCELFObjectWriter(Is64Bit, OSABI); + return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false); +} diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index e9424d8..245b457 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -1,4 +1,4 @@ -//===-- PPCMCAsmInfo.cpp - PPC asm properties -------------------*- C++ -*-===// +//===-- PPCMCAsmInfo.cpp - PPC asm properties -----------------------------===// // // The LLVM Compiler Infrastructure // @@ -14,6 +14,8 @@ #include "PPCMCAsmInfo.h" using namespace llvm; +void PPCMCAsmInfoDarwin::anchor() { } + PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { if (is64Bit) PointerSize = 8; @@ -30,6 +32,8 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { SupportsDebugInformation= true; // Debug information. } +void PPCLinuxMCAsmInfo::anchor() { } + PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { if (is64Bit) PointerSize = 8; diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h index 96ae6fb..7b4ed9f 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h @@ -1,4 +1,4 @@ -//=====-- PPCMCAsmInfo.h - PPC asm properties -----------------*- C++ -*--====// +//===-- PPCMCAsmInfo.h - PPC asm properties --------------------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -18,11 +18,15 @@ namespace llvm { - struct PPCMCAsmInfoDarwin : public MCAsmInfoDarwin { + class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin { + virtual void anchor(); + public: explicit PPCMCAsmInfoDarwin(bool is64Bit); }; - struct PPCLinuxMCAsmInfo : public MCAsmInfo { + class PPCLinuxMCAsmInfo : public MCAsmInfo { + virtual void anchor(); + public: explicit PPCLinuxMCAsmInfo(bool is64Bit); }; diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 262f97c3..5a6827f 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -57,7 +57,7 @@ public: // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. - unsigned getBinaryCodeForInstr(const MCInst &MI, + uint64_t getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups) const; void EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index d5c8a9e..6568e82 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -1,4 +1,4 @@ -//===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -------*- C++ -*-===// +//===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -----------------===// // // The LLVM Compiler Infrastructure // @@ -20,6 +20,7 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC @@ -76,7 +77,8 @@ static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) { } static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); if (RM == Reloc::Default) { @@ -86,7 +88,7 @@ static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM, else RM = Reloc::Static; } - X->InitMCCodeGenInfo(RM, CM); + X->InitMCCodeGenInfo(RM, CM, OL); return X; } @@ -106,8 +108,10 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, static MCInstPrinter *createPPCMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { - return new PPCInstPrinter(MAI, SyntaxVariant); + return new PPCInstPrinter(MAI, MII, MRI, SyntaxVariant); } extern "C" void LLVMInitializePowerPCTargetMC() { diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index e5bf2a9..b7fa064 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -14,14 +14,18 @@ #ifndef PPCMCTARGETDESC_H #define PPCMCTARGETDESC_H +#include "llvm/Support/DataTypes.h" + namespace llvm { class MCAsmBackend; class MCCodeEmitter; class MCContext; class MCInstrInfo; +class MCObjectWriter; class MCSubtargetInfo; class Target; class StringRef; +class raw_ostream; extern Target ThePPC32Target; extern Target ThePPC64Target; @@ -31,7 +35,11 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx); MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT); - + +/// createPPCELFObjectWriter - Construct an PPC ELF object writer. +MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS, + bool Is64Bit, + uint8_t OSABI); } // End llvm namespace // Defines symbolic names for PowerPC registers. This defines a mapping from diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h index 5dc1863..24a7178 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPC.h +++ b/contrib/llvm/lib/Target/PowerPC/PPC.h @@ -25,14 +25,11 @@ namespace llvm { class PPCTargetMachine; class FunctionPass; - class formatted_raw_ostream; class JITCodeEmitter; - class Target; class MachineInstr; class AsmPrinter; class MCInst; - class TargetMachine; - + FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td index 2d5d302..c554d39 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPC.td +++ b/contrib/llvm/lib/Target/PowerPC/PPC.td @@ -1,10 +1,10 @@ -//===- PPC.td - Describe the PowerPC Target Machine --------*- tablegen -*-===// -// +//===-- PPC.td - Describe the PowerPC Target Machine -------*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This is the top level entry point for the PowerPC target. @@ -23,6 +23,7 @@ include "llvm/Target/Target.td" // CPU Directives // //===----------------------------------------------------------------------===// +def Directive440 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_440", "">; def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">; def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">; def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">; @@ -33,6 +34,7 @@ def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">; def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">; def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">; def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">; +def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">; def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", "Enable 64-bit instructions">; @@ -46,6 +48,8 @@ def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", "Enable the fsqrt instruction">; def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true", "Enable the stfiwx instruction">; +def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true", + "Enable Book E instructions">; //===----------------------------------------------------------------------===// // Register File Description @@ -60,6 +64,8 @@ include "PPCInstrInfo.td" // def : Processor<"generic", G3Itineraries, [Directive32]>; +def : Processor<"440", PPC440Itineraries, [Directive440, FeatureBookE]>; +def : Processor<"450", PPC440Itineraries, [Directive440, FeatureBookE]>; def : Processor<"601", G3Itineraries, [Directive601]>; def : Processor<"602", G3Itineraries, [Directive602]>; def : Processor<"603", G3Itineraries, [Directive603]>; @@ -82,6 +88,10 @@ def : Processor<"g5", G5Itineraries, [Directive970, FeatureAltivec, FeatureGPUL, FeatureFSqrt, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */]>; +def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, + FeatureFSqrt, FeatureSTFIWX, + Feature64Bit + /*, Feature64BitRegs */]>; def : Processor<"ppc", G3Itineraries, [Directive32]>; def : Processor<"ppc64", G5Itineraries, [Directive64, FeatureAltivec, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 9528459..fb7aa71 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1,4 +1,4 @@ -//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly --------=// +//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly ------===// // // The LLVM Compiler Infrastructure // @@ -20,6 +20,7 @@ #include "PPC.h" #include "PPCTargetMachine.h" #include "PPCSubtarget.h" +#include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCPredicates.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" @@ -39,6 +40,7 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" @@ -49,10 +51,9 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ELF.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringSet.h" #include "llvm/ADT/SmallString.h" -#include "InstPrinter/PPCInstPrinter.h" using namespace llvm; namespace { @@ -366,14 +367,21 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::MFCRpseud: + case PPC::MFCR8pseud: // Transform: %R3 = MFCRpseud %CR7 // Into: %R3 = MFCR ;; cr7 OutStreamer.AddComment(PPCInstPrinter:: getRegisterName(MI->getOperand(1).getReg())); - TmpInst.setOpcode(PPC::MFCR); + TmpInst.setOpcode(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); OutStreamer.EmitInstruction(TmpInst); return; + case PPC::SYNC: + // In Book E sync is called msync, handle this special case here... + if (Subtarget.isBookE()) { + OutStreamer.EmitRawText(StringRef("\tmsync")); + return; + } } LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); @@ -385,14 +393,26 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { return AsmPrinter::EmitFunctionEntryLabel(); // Emit an official procedure descriptor. - // FIXME 64-bit SVR4: Use MCSection here! - OutStreamer.EmitRawText(StringRef("\t.section\t\".opd\",\"aw\"")); - OutStreamer.EmitRawText(StringRef("\t.align 3")); + const MCSection *Current = OutStreamer.getCurrentSection(); + const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd", + ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC, + SectionKind::getReadOnly()); + OutStreamer.SwitchSection(Section); OutStreamer.EmitLabel(CurrentFnSym); - OutStreamer.EmitRawText("\t.quad .L." + Twine(CurrentFnSym->getName()) + - ",.TOC.@tocbase"); - OutStreamer.EmitRawText(StringRef("\t.previous")); - OutStreamer.EmitRawText(".L." + Twine(CurrentFnSym->getName()) + ":"); + OutStreamer.EmitValueToAlignment(8); + MCSymbol *Symbol1 = + OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName())); + MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.@tocbase")); + OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext), + Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/); + OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, OutContext), + Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/); + OutStreamer.SwitchSection(Current); + + MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol( + ".L." + Twine(CurrentFnSym->getName())); + OutStreamer.EmitLabel(RealFnSym); + CurrentFnSymForSize = RealFnSym; } @@ -402,8 +422,10 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { bool isPPC64 = TD->getPointerSizeInBits() == 64; if (isPPC64 && !TOC.empty()) { - // FIXME 64-bit SVR4: Use MCSection here? - OutStreamer.EmitRawText(StringRef("\t.section\t\".toc\",\"aw\"")); + const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc", + ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC, + SectionKind::getReadOnly()); + OutStreamer.SwitchSection(Section); // FIXME: This is nondeterminstic! for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(), @@ -421,12 +443,14 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { static const char *const CPUDirectives[] = { "", "ppc", + "ppc440", "ppc601", "ppc602", "ppc603", "ppc7400", "ppc750", "ppc970", + "ppcA2", "ppc64" }; @@ -435,7 +459,7 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { Directive = PPC::DIR_970; if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400) Directive = PPC::DIR_7400; - if (Subtarget.isPPC64() && Directive < PPC::DIR_970) + if (Subtarget.isPPC64() && Directive < PPC::DIR_64) Directive = PPC::DIR_64; assert(Directive <= PPC::DIR_64 && "Directive out of range."); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp index 475edf3..5f775e1 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -1,4 +1,4 @@ -//===-- PPCBranchSelector.cpp - Emit long conditional branches-----*- C++ -*-=// +//===-- PPCBranchSelector.cpp - Emit long conditional branches ------------===// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td index 441db94..9883c2e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -1,10 +1,10 @@ //===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This describes the calling conventions for the PowerPC 32- and 64-bit @@ -130,3 +130,34 @@ def CC_PPC_SVR4_ByVal : CallingConv<[ CCCustom<"CC_PPC_SVR4_Custom_Dummy"> ]>; +def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, R28, + R29, R30, R31, F14, F15, F16, F17, F18, + F19, F20, F21, F22, F23, F24, F25, F26, + F27, F28, F29, F30, F31, CR2, CR3, CR4, + V20, V21, V22, V23, V24, V25, V26, V27, + V28, V29, V30, V31)>; + +def CSR_SVR432 : CalleeSavedRegs<(add R14, R15, R16, R17, R18, R19, R20, VRSAVE, + R21, R22, R23, R24, R25, R26, R27, R28, + R29, R30, R31, F14, F15, F16, F17, F18, + F19, F20, F21, F22, F23, F24, F25, F26, + F27, F28, F29, F30, F31, CR2, CR3, CR4, + V20, V21, V22, V23, V24, V25, V26, V27, + V28, V29, V30, V31)>; + +def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20, + X21, X22, X23, X24, X25, X26, X27, X28, + X29, X30, X31, F14, F15, F16, F17, F18, + F19, F20, F21, F22, F23, F24, F25, F26, + F27, F28, F29, F30, F31, CR2, CR3, CR4, + V20, V21, V22, V23, V24, V25, V26, V27, + V28, V29, V30, V31)>; + +def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAVE, + X21, X22, X23, X24, X25, X26, X27, X28, + X29, X30, X31, F14, F15, F16, F17, F18, + F19, F20, F21, F22, F23, F24, F25, F26, + F27, F28, F29, F30, F31, CR2, CR3, CR4, + V20, V21, V22, V23, V24, V25, V26, V27, + V28, V29, V30, V31)>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp index 4a1f182..252a2d1 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -1,4 +1,4 @@ -//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC32 -------*- C++ -*-=// +//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC -----------------===// // // The LLVM Compiler Infrastructure // @@ -50,7 +50,7 @@ namespace { /// getBinaryCodeForInstr - This function, generated by the /// CodeEmitterGenerator using TableGen, produces the binary encoding for /// machine instructions. - unsigned getBinaryCodeForInstr(const MachineInstr &MI) const; + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; MachineRelocation GetRelocation(const MachineOperand &MO, @@ -138,7 +138,8 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) { unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const { const MachineOperand &MO = MI.getOperand(OpNo); - assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) && + assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 || + MI.getOpcode() == PPC::MFOCRF) && (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); return 0x80 >> getPPCRegisterNumbering(MO.getReg()); } @@ -248,7 +249,8 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, if (MO.isReg()) { // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand. // The GPR operand should come through here though. - assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) || + assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 && + MI.getOpcode() != PPC::MFOCRF) || MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); return getPPCRegisterNumbering(MO.getReg()); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 0b85fea..b77a80b 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1,4 +1,4 @@ -//=====- PPCFrameLowering.cpp - PPC Frame Information -----------*- C++ -*-===// +//===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// // // The LLVM Compiler Infrastructure // @@ -38,7 +38,7 @@ using namespace llvm; /// VRRegNo - Map from a numbered VR register to its enum value. /// -static const unsigned short VRRegNo[] = { +static const uint16_t VRRegNo[] = { PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, @@ -64,7 +64,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) { // epilog blocks. for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { // If last instruction is a return instruction, add an epilogue - if (!I->empty() && I->back().getDesc().isReturn()) { + if (!I->empty() && I->back().isReturn()) { bool FoundIt = false; for (MBBI = I->end(); MBBI != I->begin(); ) { --MBBI; @@ -244,8 +244,10 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { if (MF.getFunction()->hasFnAttr(Attribute::Naked)) return false; - return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() || - (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall()); + return MF.getTarget().Options.DisableFramePointerElim(MF) || + MFI->hasVarSizedObjects() || + (MF.getTarget().Options.GuaranteedTailCallOpt && + MF.getInfo<PPCFunctionInfo>()->hasFastCall()); } @@ -365,8 +367,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(PPC::R0, RegState::Kill) .addImm(NegFrameSize); BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX)) - .addReg(PPC::R1) - .addReg(PPC::R1) + .addReg(PPC::R1, RegState::Kill) + .addReg(PPC::R1, RegState::Define) .addReg(PPC::R0); } else if (isInt<16>(NegFrameSize)) { BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1) @@ -380,8 +382,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(PPC::R0, RegState::Kill) .addImm(NegFrameSize & 0xFFFF); BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX)) - .addReg(PPC::R1) - .addReg(PPC::R1) + .addReg(PPC::R1, RegState::Kill) + .addReg(PPC::R1, RegState::Define) .addReg(PPC::R0); } } else { // PPC64. @@ -398,8 +400,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(PPC::X0) .addImm(NegFrameSize); BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX)) - .addReg(PPC::X1) - .addReg(PPC::X1) + .addReg(PPC::X1, RegState::Kill) + .addReg(PPC::X1, RegState::Define) .addReg(PPC::X0); } else if (isInt<16>(NegFrameSize)) { BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1) @@ -413,8 +415,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(PPC::X0, RegState::Kill) .addImm(NegFrameSize & 0xFFFF); BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX)) - .addReg(PPC::X1) - .addReg(PPC::X1) + .addReg(PPC::X1, RegState::Kill) + .addReg(PPC::X1, RegState::Define) .addReg(PPC::X0); } } @@ -655,7 +657,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, // Callee pop calling convention. Pop parameter/linkage area. Used for tail // call optimization - if (GuaranteedTailCallOpt && RetOpcode == PPC::BLR && + if (MF.getTarget().Options.GuaranteedTailCallOpt && RetOpcode == PPC::BLR && MF.getFunction()->getCallingConv() == CallingConv::Fast) { PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); unsigned CallerAllocatedAmt = FI->getMinReservedArea(); @@ -758,7 +760,8 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Reserve stack space to move the linkage area to in case of a tail call. int TCSPDelta = 0; - if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) { + if (MF.getTarget().Options.GuaranteedTailCallOpt && + (TCSPDelta = FI->getTailCallSPDelta()) < 0) { MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); } @@ -769,7 +772,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // FIXME: doesn't detect whether or not we need to spill vXX, which requires // r0 for now. - if (RegInfo->requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable. + if (RegInfo->requiresRegisterScavenging(MF)) if (needsFP(MF) || spillsCR(MF)) { const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; @@ -863,7 +866,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) // Take into account stack space reserved for tail calls. int TCSPDelta = 0; - if (GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { + if (MF.getTarget().Options.GuaranteedTailCallOpt && + (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { LowerBound = TCSPDelta; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h index 20faa71..d708541 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h @@ -1,4 +1,4 @@ -//==-- PPCFrameLowering.h - Define frame lowering for PowerPC ----*- C++ -*-==// +//===-- PPCFrameLowering.h - Define frame lowering for PowerPC --*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp index cddc9d8..6ed1fb9 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -22,6 +22,30 @@ using namespace llvm; //===----------------------------------------------------------------------===// +// PowerPC Scoreboard Hazard Recognizer +void PPCScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + if (!MCID) + // This is a PPC pseudo-instruction. + return; + + ScoreboardHazardRecognizer::EmitInstruction(SU); +} + +ScheduleHazardRecognizer::HazardType +PPCScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { + return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); +} + +void PPCScoreboardHazardRecognizer::AdvanceCycle() { + ScoreboardHazardRecognizer::AdvanceCycle(); +} + +void PPCScoreboardHazardRecognizer::Reset() { + ScoreboardHazardRecognizer::Reset(); +} + +//===----------------------------------------------------------------------===// // PowerPC 970 Hazard Recognizer // // This models the dispatch group formation of the PPC970 processor. Dispatch @@ -67,12 +91,6 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode, bool &isFirst, bool &isSingle, bool &isCracked, bool &isLoad, bool &isStore) { - if ((int)Opcode >= 0) { - isFirst = isSingle = isCracked = isLoad = isStore = false; - return PPCII::PPC970_Pseudo; - } - Opcode = ~Opcode; - const MCInstrDesc &MCID = TII.get(Opcode); isLoad = MCID.mayLoad(); @@ -89,29 +107,23 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode, /// isLoadOfStoredAddress - If we have a load from the previously stored pointer /// as indicated by StorePtr1/StorePtr2/StoreSize, return true. bool PPCHazardRecognizer970:: -isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const { +isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset, + const Value *LoadValue) const { for (unsigned i = 0, e = NumStores; i != e; ++i) { // Handle exact and commuted addresses. - if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i]) - return true; - if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i]) + if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i]) return true; // Okay, we don't have an exact match, if this is an indexed offset, see if // we have overlap (which happens during fp->int conversion for example). - if (StorePtr2[i] == Ptr2) { - if (ConstantSDNode *StoreOffset = dyn_cast<ConstantSDNode>(StorePtr1[i])) - if (ConstantSDNode *LoadOffset = dyn_cast<ConstantSDNode>(Ptr1)) { - // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check - // to see if the load and store actually overlap. - int StoreOffs = StoreOffset->getZExtValue(); - int LoadOffs = LoadOffset->getZExtValue(); - if (StoreOffs < LoadOffs) { - if (int(StoreOffs+StoreSize[i]) > LoadOffs) return true; - } else { - if (int(LoadOffs+LoadSize) > StoreOffs) return true; - } - } + if (StoreValue[i] == LoadValue) { + // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check + // to see if the load and store actually overlap. + if (StoreOffset[i] < LoadOffset) { + if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true; + } else { + if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true; + } } } return false; @@ -125,13 +137,17 @@ ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970:: getHazardType(SUnit *SU, int Stalls) { assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead"); - const SDNode *Node = SU->getNode()->getGluedMachineNode(); + MachineInstr *MI = SU->getInstr(); + + if (MI->isDebugValue()) + return NoHazard; + + unsigned Opcode = MI->getOpcode(); bool isFirst, isSingle, isCracked, isLoad, isStore; PPCII::PPC970_Unit InstrType = - GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked, + GetInstrType(Opcode, isFirst, isSingle, isCracked, isLoad, isStore); if (InstrType == PPCII::PPC970_Pseudo) return NoHazard; - unsigned Opcode = Node->getMachineOpcode(); // We can only issue a PPC970_First/PPC970_Single instruction (such as // crand/mtspr/etc) if this is the first cycle of the dispatch group. @@ -168,55 +184,10 @@ getHazardType(SUnit *SU, int Stalls) { // If this is a load following a store, make sure it's not to the same or // overlapping address. - if (isLoad && NumStores) { - unsigned LoadSize; - switch (Opcode) { - default: llvm_unreachable("Unknown load!"); - case PPC::LBZ: case PPC::LBZU: - case PPC::LBZX: - case PPC::LBZ8: case PPC::LBZU8: - case PPC::LBZX8: - case PPC::LVEBX: - LoadSize = 1; - break; - case PPC::LHA: case PPC::LHAU: - case PPC::LHAX: - case PPC::LHZ: case PPC::LHZU: - case PPC::LHZX: - case PPC::LVEHX: - case PPC::LHBRX: - case PPC::LHA8: case PPC::LHAU8: - case PPC::LHAX8: - case PPC::LHZ8: case PPC::LHZU8: - case PPC::LHZX8: - LoadSize = 2; - break; - case PPC::LFS: case PPC::LFSU: - case PPC::LFSX: - case PPC::LWZ: case PPC::LWZU: - case PPC::LWZX: - case PPC::LWA: - case PPC::LWAX: - case PPC::LVEWX: - case PPC::LWBRX: - case PPC::LWZ8: - case PPC::LWZX8: - LoadSize = 4; - break; - case PPC::LFD: case PPC::LFDU: - case PPC::LFDX: - case PPC::LD: case PPC::LDU: - case PPC::LDX: - LoadSize = 8; - break; - case PPC::LVX: - case PPC::LVXL: - LoadSize = 16; - break; - } - - if (isLoadOfStoredAddress(LoadSize, - Node->getOperand(0), Node->getOperand(1))) + if (isLoad && NumStores && !MI->memoperands_empty()) { + MachineMemOperand *MO = *MI->memoperands_begin(); + if (isLoadOfStoredAddress(MO->getSize(), + MO->getOffset(), MO->getValue())) return NoopHazard; } @@ -224,66 +195,27 @@ getHazardType(SUnit *SU, int Stalls) { } void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { - const SDNode *Node = SU->getNode()->getGluedMachineNode(); + MachineInstr *MI = SU->getInstr(); + + if (MI->isDebugValue()) + return; + + unsigned Opcode = MI->getOpcode(); bool isFirst, isSingle, isCracked, isLoad, isStore; PPCII::PPC970_Unit InstrType = - GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked, + GetInstrType(Opcode, isFirst, isSingle, isCracked, isLoad, isStore); if (InstrType == PPCII::PPC970_Pseudo) return; - unsigned Opcode = Node->getMachineOpcode(); // Update structural hazard information. if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true; // Track the address stored to. - if (isStore) { - unsigned ThisStoreSize; - switch (Opcode) { - default: llvm_unreachable("Unknown store instruction!"); - case PPC::STB: case PPC::STB8: - case PPC::STBU: case PPC::STBU8: - case PPC::STBX: case PPC::STBX8: - case PPC::STVEBX: - ThisStoreSize = 1; - break; - case PPC::STH: case PPC::STH8: - case PPC::STHU: case PPC::STHU8: - case PPC::STHX: case PPC::STHX8: - case PPC::STVEHX: - case PPC::STHBRX: - ThisStoreSize = 2; - break; - case PPC::STFS: - case PPC::STFSU: - case PPC::STFSX: - case PPC::STWX: case PPC::STWX8: - case PPC::STWUX: - case PPC::STW: case PPC::STW8: - case PPC::STWU: - case PPC::STVEWX: - case PPC::STFIWX: - case PPC::STWBRX: - ThisStoreSize = 4; - break; - case PPC::STD_32: - case PPC::STDX_32: - case PPC::STD: - case PPC::STDU: - case PPC::STFD: - case PPC::STFDX: - case PPC::STDX: - case PPC::STDUX: - ThisStoreSize = 8; - break; - case PPC::STVX: - case PPC::STVXL: - ThisStoreSize = 16; - break; - } - - StoreSize[NumStores] = ThisStoreSize; - StorePtr1[NumStores] = Node->getOperand(1); - StorePtr2[NumStores] = Node->getOperand(2); + if (isStore && NumStores < 4 && !MI->memoperands_empty()) { + MachineMemOperand *MO = *MI->memoperands_begin(); + StoreSize[NumStores] = MO->getSize(); + StoreOffset[NumStores] = MO->getOffset(); + StoreValue[NumStores] = MO->getValue(); ++NumStores; } @@ -306,3 +238,8 @@ void PPCHazardRecognizer970::AdvanceCycle() { if (NumIssued == 5) EndDispatchGroup(); } + +void PPCHazardRecognizer970::Reset() { + EndDispatchGroup(); +} + diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h index 2f81f0f..55b45d0 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -14,12 +14,28 @@ #ifndef PPCHAZRECS_H #define PPCHAZRECS_H +#include "PPCInstrInfo.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "PPCInstrInfo.h" namespace llvm { +/// PPCScoreboardHazardRecognizer - This class implements a scoreboard-based +/// hazard recognizer for generic PPC processors. +class PPCScoreboardHazardRecognizer : public ScoreboardHazardRecognizer { + const ScheduleDAG *DAG; +public: + PPCScoreboardHazardRecognizer(const InstrItineraryData *ItinData, + const ScheduleDAG *DAG_) : + ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) {} + + virtual HazardType getHazardType(SUnit *SU, int Stalls); + virtual void EmitInstruction(SUnit *SU); + virtual void AdvanceCycle(); + virtual void Reset(); +}; + /// PPCHazardRecognizer970 - This class defines a finite state automata that /// models the dispatch logic on the PowerPC 970 (aka G5) processor. This /// promotes good dispatch group formation and implements noop insertion to @@ -42,8 +58,9 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer { // // This is null if we haven't seen a store yet. We keep track of both // operands of the store here, since we support [r+r] and [r+i] addressing. - SDValue StorePtr1[4], StorePtr2[4]; - unsigned StoreSize[4]; + const Value *StoreValue[4]; + int64_t StoreOffset[4]; + uint64_t StoreSize[4]; unsigned NumStores; public: @@ -51,6 +68,7 @@ public: virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void EmitInstruction(SUnit *SU); virtual void AdvanceCycle(); + virtual void Reset(); private: /// EndDispatchGroup - Called when we are finishing a new dispatch group. @@ -63,8 +81,8 @@ private: bool &isFirst, bool &isSingle,bool &isCracked, bool &isLoad, bool &isStore); - bool isLoadOfStoredAddress(unsigned LoadSize, - SDValue Ptr1, SDValue Ptr2) const; + bool isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset, + const Value *LoadValue) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 6f204cc..5a04888 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -18,7 +18,6 @@ #include "MCTargetDesc/PPCPredicates.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" @@ -211,13 +210,13 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { // Find all return blocks, outputting a restore in each epilog. for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { - if (!BB->empty() && BB->back().getDesc().isReturn()) { + if (!BB->empty() && BB->back().isReturn()) { IP = BB->end(); --IP; // Skip over all terminator instructions, which are part of the return // sequence. MachineBasicBlock::iterator I2 = IP; - while (I2 != BB->begin() && (--I2)->getDesc().isTerminator()) + while (I2 != BB->begin() && (--I2)->isTerminator()) IP = I2; // Emit: MTVRSAVE InVRSave @@ -378,8 +377,8 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { DebugLoc dl = N->getDebugLoc(); APInt LKZ, LKO, RKZ, RKO; - CurDAG->ComputeMaskedBits(Op0, APInt::getAllOnesValue(32), LKZ, LKO); - CurDAG->ComputeMaskedBits(Op1, APInt::getAllOnesValue(32), RKZ, RKO); + CurDAG->ComputeMaskedBits(Op0, LKZ, LKO); + CurDAG->ComputeMaskedBits(Op1, RKZ, RKO); unsigned TargetMask = LKZ.getZExtValue(); unsigned InsertMask = RKZ.getZExtValue(); @@ -603,7 +602,6 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) { case ISD::SETULT: return 0; case ISD::SETUGT: return 1; } - return 0; } SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { @@ -1067,7 +1065,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Target = N->getOperand(1); unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8; unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8; - Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Target, + Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target, Chain), 0); return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index d6b8a9e..3b24951 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -16,26 +16,24 @@ #include "PPCPerfectShuffle.h" #include "PPCTargetMachine.h" #include "MCTargetDesc/PPCPredicates.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/VectorExtras.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DerivedTypes.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, @@ -104,6 +102,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // from FP_ROUND: that rounds to nearest, this rounds to zero. setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); + // We do not currently implment this libm ops for PowerPC. + setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); + setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); + setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); + setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); + // PowerPC has no SREM/UREM instructions setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); @@ -147,9 +152,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BSWAP, MVT::i32 , Expand); setOperationAction(ISD::CTPOP, MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::BSWAP, MVT::i64 , Expand); setOperationAction(ISD::CTPOP, MVT::i64 , Expand); setOperationAction(ISD::CTTZ , MVT::i64 , Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); // PowerPC does not have ROTR setOperationAction(ISD::ROTR, MVT::i32 , Expand); @@ -217,11 +226,23 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); - // VAARG is custom lowered with the 32-bit SVR4 ABI. - if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI() - && !TM.getSubtarget<PPCSubtarget>().isPPC64()) { - setOperationAction(ISD::VAARG, MVT::Other, Custom); - setOperationAction(ISD::VAARG, MVT::i64, Custom); + if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) { + if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { + // VAARG always uses double-word chunks, so promote anything smaller. + setOperationAction(ISD::VAARG, MVT::i1, Promote); + AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); + setOperationAction(ISD::VAARG, MVT::i8, Promote); + AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); + setOperationAction(ISD::VAARG, MVT::i16, Promote); + AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); + setOperationAction(ISD::VAARG, MVT::i32, Promote); + AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + } else { + // VAARG is custom lowered with the 32-bit SVR4 ABI. + setOperationAction(ISD::VAARG, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::i64, Custom); + } } else setOperationAction(ISD::VAARG, MVT::Other, Expand); @@ -333,7 +354,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTLZ, VT, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); } // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle @@ -366,6 +389,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); } + if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) + setOperationAction(ISD::PREFETCH, MVT::Other, Legal); + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); @@ -408,6 +434,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setInsertFencesForAtomic(true); + setSchedulingPreference(Sched::Hybrid); + computeRegisterProperties(); } @@ -418,7 +446,16 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { // Darwin passes everything on 4 byte boundary. if (TM.getSubtarget<PPCSubtarget>().isDarwin()) return 4; - // FIXME SVR4 TBD + + // 16byte and wider vectors are passed on 16byte boundary. + if (VectorType *VTy = dyn_cast<VectorType>(Ty)) + if (VTy->getBitWidth() >= 128) + return 16; + + // The rest is 8 on PPC64 and 4 on PPC32 boundary. + if (PPCSubTarget.isPPC64()) + return 8; + return 4; } @@ -447,6 +484,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; case PPCISD::STD_32: return "PPCISD::STD_32"; case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4"; + case PPCISD::CALL_NOP_SVR4: return "PPCISD::CALL_NOP_SVR4"; case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin"; case PPCISD::NOP: return "PPCISD::NOP"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; @@ -822,14 +860,10 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, APInt LHSKnownZero, LHSKnownOne; APInt RHSKnownZero, RHSKnownOne; DAG.ComputeMaskedBits(N.getOperand(0), - APInt::getAllOnesValue(N.getOperand(0) - .getValueSizeInBits()), LHSKnownZero, LHSKnownOne); if (LHSKnownZero.getBoolValue()) { DAG.ComputeMaskedBits(N.getOperand(1), - APInt::getAllOnesValue(N.getOperand(1) - .getValueSizeInBits()), RHSKnownZero, RHSKnownOne); // If all of the bits are known zero on the LHS or RHS, the add won't // carry. @@ -884,10 +918,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. APInt LHSKnownZero, LHSKnownOne; - DAG.ComputeMaskedBits(N.getOperand(0), - APInt::getAllOnesValue(N.getOperand(0) - .getValueSizeInBits()), - LHSKnownZero, LHSKnownOne); + DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { // If all of the bits are known zero on the LHS or RHS, the add won't @@ -1000,10 +1031,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. APInt LHSKnownZero, LHSKnownOne; - DAG.ComputeMaskedBits(N.getOperand(0), - APInt::getAllOnesValue(N.getOperand(0) - .getValueSizeInBits()), - LHSKnownZero, LHSKnownOne); + DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { // If all of the bits are known zero on the LHS or RHS, the add won't // carry. @@ -1223,7 +1251,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, // extra load to get the address of the global. if (MOHiFlag & PPCII::MO_NLP_FLAG) Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); return Ptr; } @@ -1319,11 +1347,13 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, // areas SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, + false, 0); InChain = OverflowArea.getValue(1); SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, + false, 0); InChain = RegSaveArea.getValue(1); // select overflow_area if index > 8 @@ -1372,7 +1402,8 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, MachinePointerInfo(), MVT::i32, false, false, 0); - return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, 0); + return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), + false, false, false, 0); } SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, @@ -1411,8 +1442,9 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) std::pair<SDValue, SDValue> CallResult = LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, CallingConv::C, false, - /*isReturnValueUsed=*/true, + false, false, false, false, 0, CallingConv::C, + /*isTailCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__trampoline_setup", PtrVT), Args, DAG, dl); @@ -1530,7 +1562,7 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { - static const unsigned ArgRegs[] = { + static const uint16_t ArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; @@ -1557,7 +1589,7 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { - static const unsigned ArgRegs[] = { + static const uint16_t ArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 }; @@ -1581,8 +1613,8 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, /// GetFPR - Get the set of FP registers that should be allocated for arguments, /// on Darwin. -static const unsigned *GetFPR() { - static const unsigned FPR[] = { +static const uint16_t *GetFPR() { + static const uint16_t FPR[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 }; @@ -1663,7 +1695,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4( EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Potential tail calls could cause overwriting of argument stack slots. - bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast)); + bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && + (CallConv == CallingConv::Fast)); unsigned PtrByteSize = 4; // Assign locations to all of the incoming arguments. @@ -1681,7 +1714,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // Arguments stored in registers. if (VA.isRegLoc()) { - TargetRegisterClass *RC; + const TargetRegisterClass *RC; EVT ValVT = VA.getValVT(); switch (ValVT.getSimpleVT().SimpleTy) { @@ -1721,7 +1754,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo(), - false, false, 0)); + false, false, false, 0)); } } @@ -1762,13 +1795,13 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { - static const unsigned GPArgRegs[] = { + static const uint16_t GPArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); - static const unsigned FPArgRegs[] = { + static const uint16_t FPArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 }; @@ -1853,25 +1886,26 @@ PPCTargetLowering::LowerFormalArguments_Darwin( EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; // Potential tail calls could cause overwriting of argument stack slots. - bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast)); + bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && + (CallConv == CallingConv::Fast)); unsigned PtrByteSize = isPPC64 ? 8 : 4; unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); // Area that is at least reserved in caller of this function. unsigned MinReservedArea = ArgOffset; - static const unsigned GPR_32[] = { // 32-bit registers. + static const uint16_t GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; - static const unsigned GPR_64[] = { // 64-bit registers. + static const uint16_t GPR_64[] = { // 64-bit registers. PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const unsigned *FPR = GetFPR(); + static const uint16_t *FPR = GetFPR(); - static const unsigned VR[] = { + static const uint16_t VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; @@ -1882,7 +1916,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; - const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; + const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; // In 32-bit non-varargs functions, the stack space for vectors is after the // stack space for non-vectors. We do not use this space unless we have @@ -1896,12 +1930,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin( for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { EVT ObjectVT = Ins[ArgNo].VT; - unsigned ObjSize = ObjectVT.getSizeInBits()/8; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; if (Flags.isByVal()) { // ObjSize is the true size, ArgSize rounded up to multiple of regs. - ObjSize = Flags.getByValSize(); + unsigned ObjSize = Flags.getByValSize(); unsigned ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; VecArgOffset += ArgSize; @@ -2138,7 +2171,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( isImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); } InVals.push_back(ArgVal); @@ -2259,9 +2292,9 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); // Tail call needs the stack to be aligned. - if (CC==CallingConv::Fast && GuaranteedTailCallOpt) { - unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()-> - getStackAlignment(); + if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){ + unsigned TargetAlign = DAG.getMachineFunction().getTarget(). + getFrameLowering()->getStackAlignment(); unsigned AlignMask = TargetAlign-1; NumBytes = (NumBytes + AlignMask) & ~AlignMask; } @@ -2295,7 +2328,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const { - if (!GuaranteedTailCallOpt) + if (!getTargetMachine().Options.GuaranteedTailCallOpt) return false; // Variable argument functions are not supported. @@ -2443,7 +2476,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; LROpOut = getReturnAddrFrameIndex(DAG); LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); Chain = SDValue(LROpOut.getNode(), 1); // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack @@ -2451,7 +2484,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, if (isDarwinABI) { FPOpOut = getFramePointerFrameIndex(DAG); FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); Chain = SDValue(FPOpOut.getNode(), 1); } } @@ -2748,7 +2781,14 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // the stack. Account for this here so these bytes can be pushed back on in // PPCRegisterInfo::eliminateCallFramePseudoInstr. int BytesCalleePops = - (CallConv==CallingConv::Fast && GuaranteedTailCallOpt) ? NumBytes : 0; + (CallConv == CallingConv::Fast && + getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; + + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); if (InFlag.getNode()) Ops.push_back(InFlag); @@ -2776,9 +2816,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size()); } - Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); - InFlag = Chain.getValue(1); - // Add a NOP immediately after the branch instruction when using the 64-bit // SVR4 ABI. At link time, if caller and callee are in a different module and // thus have a different TOC, the call will be replaced with a call to a stub @@ -2787,8 +2824,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // which restores the TOC of the caller from the TOC save slot of the current // stack frame. If caller and callee belong to the same module (and have the // same TOC), the NOP will remain unchanged. + + bool needsTOCRestore = false; if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { - SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); if (CallOpc == PPCISD::BCTRL_SVR4) { // This is a call through a function pointer. // Restore the caller TOC from the save area into R2. @@ -2799,14 +2837,22 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // since r2 is a reserved register (which prevents the register allocator // from allocating it), resulting in an additional register being // allocated and an unnecessary move instruction being generated. - Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag); - InFlag = Chain.getValue(1); - } else { + needsTOCRestore = true; + } else if (CallOpc == PPCISD::CALL_SVR4) { // Otherwise insert NOP. - InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Glue, InFlag); + CallOpc = PPCISD::CALL_NOP_SVR4; } } + Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + if (needsTOCRestore) { + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag); + InFlag = Chain.getValue(1); + } + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), DAG.getIntPtrConstant(BytesCalleePops, true), InFlag); @@ -2820,7 +2866,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, SDValue PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -2864,7 +2910,8 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, // and restoring the callers stack pointer in this functions epilog. This is // done because by tail calling the called function might overwrite the value // in this function's (MF) stack pointer stack slot 0(SP). - if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast) + if (getTargetMachine().Options.GuaranteedTailCallOpt && + CallConv == CallingConv::Fast) MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); // Count how many bytes are to be pushed on the stack, including the linkage @@ -3071,7 +3118,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // and restoring the callers stack pointer in this functions epilog. This is // done because by tail calling the called function might overwrite the value // in this function's (MF) stack pointer stack slot 0(SP). - if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast) + if (getTargetMachine().Options.GuaranteedTailCallOpt && + CallConv == CallingConv::Fast) MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); unsigned nAltivecParamsAtEnd = 0; @@ -3120,17 +3168,17 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; - static const unsigned GPR_32[] = { // 32-bit registers. + static const uint16_t GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; - static const unsigned GPR_64[] = { // 64-bit registers. + static const uint16_t GPR_64[] = { // 64-bit registers. PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const unsigned *FPR = GetFPR(); + static const uint16_t *FPR = GetFPR(); - static const unsigned VR[] = { + static const uint16_t VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; @@ -3138,7 +3186,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); - const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; + const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; SmallVector<TailCallArgumentInfo, 8> TailCallArguments; @@ -3212,7 +3260,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, if (GPR_idx != NumGPRs) { SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); ArgOffset += PtrByteSize; @@ -3250,7 +3298,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // Float varargs are always shadowed in available integer registers if (GPR_idx != NumGPRs) { SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, + false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } @@ -3259,7 +3308,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } @@ -3308,7 +3357,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, if (VR_idx != NumVRs) { SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); } @@ -3319,7 +3368,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, DAG.getConstant(i, PtrVT)); SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } @@ -3483,7 +3532,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, // Load the old link SP. SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // Restore the stack pointer. Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); @@ -3674,7 +3723,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, DAG.getConstant(4, FIPtr.getValueType())); return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); } SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, @@ -3718,7 +3767,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, Ops, 4, MVT::i64, MMO); // Load the value as a double. SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // FCFID it and return it. SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld); @@ -3770,7 +3819,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SDValue Four = DAG.getConstant(4, PtrVT); SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // Transform as necessary SDValue CWD1 = @@ -4236,8 +4285,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // Check to see if this is a shuffle of 4-byte values. If so, we can use our // perfect shuffle table to emit an optimal matching sequence. - SmallVector<int, 16> PermMask; - SVOp->getMask(PermMask); + ArrayRef<int> PermMask = SVOp->getMask(); unsigned PFIndexes[4]; bool isFourElementShuffle = true; @@ -4441,7 +4489,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, false, false, 0); // Load it out. return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); } SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { @@ -4549,7 +4597,6 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); } - return SDValue(); } void PPCTargetLowering::ReplaceNodeResults(SDNode *N, @@ -4559,8 +4606,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { default: - assert(false && "Do not know how to custom type legalize this operation!"); - return; + llvm_unreachable("Do not know how to custom type legalize this operation!"); case ISD::VAARG: { if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI() || TM.getSubtarget<PPCSubtarget>().isPPC64()) @@ -5461,12 +5507,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, //===----------------------------------------------------------------------===// void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { - KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); switch (Op.getOpcode()) { default: break; case PPCISD::LBRX: { @@ -5700,7 +5745,7 @@ bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{ return (V > -(1 << 16) && V < (1 << 16)-1); } -bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { +bool PPCTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const { return false; } @@ -5729,13 +5774,13 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameAddr, Offset), - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); } // Just load the return address off the stack. SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - RetAddrFI, MachinePointerInfo(), false, false, 0); + RetAddrFI, MachinePointerInfo(), false, false, false, 0); } SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, @@ -5749,7 +5794,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); - bool is31 = (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) && + bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) || + MFI->hasVarSizedObjects()) && MFI->getStackSize() && !MF.getFunction()->hasFnAttr(Attribute::Naked); unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) : @@ -5758,7 +5804,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, PtrVT); while (Depth--) FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), - FrameAddr, MachinePointerInfo(), false, false, 0); + FrameAddr, MachinePointerInfo(), false, false, + false, 0); return FrameAddr; } @@ -5774,7 +5821,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, /// probably because the source does not need to be loaded. If -/// 'NonScalarIntSafe' is true, that means it's safe to return a +/// 'IsZeroVal' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is /// constant so it does not need to be loaded. @@ -5782,7 +5829,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// target-independent logic. EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool NonScalarIntSafe, + bool IsZeroVal, bool MemcpyStrSrc, MachineFunction &MF) const { if (this->PPCSubTarget.isPPC64()) { @@ -5791,3 +5838,12 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, return MVT::i32; } } + +Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { + unsigned Directive = PPCSubTarget.getDarwinDirective(); + if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) + return Sched::ILP; + + return TargetLowering::getSchedulingPreference(N); +} + diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h index 430e45e..18eb072 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -15,10 +15,10 @@ #ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H #define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H -#include "llvm/Target/TargetLowering.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "PPC.h" #include "PPCSubtarget.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/CodeGen/SelectionDAG.h" namespace llvm { namespace PPCISD { @@ -95,7 +95,9 @@ namespace llvm { EXTSW_32, /// CALL - A direct function call. - CALL_Darwin, CALL_SVR4, + /// CALL_NOP_SVR4 is a call with the special NOP which follows 64-bit + /// SVR4 calls. + CALL_Darwin, CALL_SVR4, CALL_NOP_SVR4, /// NOP - Special NOP which follows 64-bit SVR4 calls. NOP, @@ -279,6 +281,7 @@ namespace llvm { bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const; + Sched::Preference getSchedulingPreference(SDNode *N) const; /// LowerOperation - Provide custom lowering hooks for some operations. /// @@ -293,7 +296,6 @@ namespace llvm { virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual void computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, @@ -353,7 +355,7 @@ namespace llvm { /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, /// probably because the source does not need to be loaded. If - /// 'NonScalarIntSafe' is true, that means it's safe to return a + /// 'IsZeroVal' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is /// constant so it does not need to be loaded. @@ -361,7 +363,7 @@ namespace llvm { /// target-independent logic. virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool NonScalarIntSafe, bool MemcpyStrSrc, + bool IsZeroVal, bool MemcpyStrSrc, MachineFunction &MF) const; private: @@ -437,8 +439,8 @@ namespace llvm { SmallVectorImpl<SDValue> &InVals) const; virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, + LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, + bool isVarArg, bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -472,21 +474,21 @@ namespace llvm { SmallVectorImpl<SDValue> &InVals) const; SDValue - LowerCall_Darwin(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + LowerCall_Darwin(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, + bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; SDValue - LowerCall_SVR4(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, bool isTailCall, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; + LowerCall_SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, + bool isVarArg, bool isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; }; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index e88ad37..7f67a41 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -1,10 +1,10 @@ -//===- PPCInstr64Bit.td - The PowerPC 64-bit Support -------*- tablegen -*-===// -// +//===-- PPCInstr64Bit.td - The PowerPC 64-bit Support ------*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the PowerPC 64-bit instructions. These patterns are used @@ -64,13 +64,7 @@ let Defs = [LR8] in PPC970_Unit_BRU; // Darwin ABI Calls. -let isCall = 1, PPC970_Unit = 7, - // All calls clobber the PPC64 non-callee saved registers. - Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12, - F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, - V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, - LR8,CTR8, - CR0,CR1,CR5,CR6,CR7,CARRY] in { +let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL8_Darwin : IForm<18, 0, 1, @@ -90,23 +84,29 @@ let isCall = 1, PPC970_Unit = 7, // ELF 64 ABI Calls = Darwin ABI Calls // Used to define BL8_ELF and BLA8_ELF -let isCall = 1, PPC970_Unit = 7, - // All calls clobber the PPC64 non-callee saved registers. - Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12, - F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, - V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, - LR8,CTR8, - CR0,CR1,CR5,CR6,CR7,CARRY] in { +let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL8_ELF : IForm<18, 0, 1, (outs), (ins calltarget:$func, variable_ops), - "bl $func", BrB, []>; // See Pat patterns below. + "bl $func", BrB, []>; // See Pat patterns below. + + let isCodeGenOnly = 1 in + def BL8_NOP_ELF : IForm_and_DForm_4_zero<18, 0, 1, 24, + (outs), (ins calltarget:$func, variable_ops), + "bl $func\n\tnop", BrB, []>; + def BLA8_ELF : IForm<18, 1, 1, (outs), (ins aaddr:$func, variable_ops), "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>; + + let isCodeGenOnly = 1 in + def BLA8_NOP_ELF : IForm_and_DForm_4_zero<18, 1, 1, 24, + (outs), (ins aaddr:$func, variable_ops), + "bla $func\n\tnop", BrB, + [(PPCcall_nop_SVR4 (i64 imm:$func))]>; } - let Uses = [CTR8, RM] in { + let Uses = [X11, CTR8, RM] in { def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins variable_ops), "bctrl", BrB, @@ -123,8 +123,14 @@ def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)), def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)), (BL8_ELF tglobaladdr:$dst)>; +def : Pat<(PPCcall_nop_SVR4 (i64 tglobaladdr:$dst)), + (BL8_NOP_ELF tglobaladdr:$dst)>; + def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)), (BL8_ELF texternalsym:$dst)>; +def : Pat<(PPCcall_nop_SVR4 (i64 texternalsym:$dst)), + (BL8_NOP_ELF texternalsym:$dst)>; + def : Pat<(PPCnop), (NOP)>; @@ -223,6 +229,18 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm), def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), (TCRETURNri8 CTRRC8:$dst, imm:$imm)>; +// 64-but CR instructions +def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS), + "mtcrf $FXM, $rS", BrMCRX>, + PPC970_MicroCode, PPC970_Unit_CRU; + +def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM), + "", SprMFCR>, + PPC970_MicroCode, PPC970_Unit_CRU; + +def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins), + "mfcr $rT", SprMFCR>, + PPC970_MicroCode, PPC970_Unit_CRU; //===----------------------------------------------------------------------===// // 64-bit SPR manipulation instrs. @@ -469,6 +487,12 @@ def RLDICR : MDForm_1<30, 1, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME), "rldicr $rA, $rS, $SH, $ME", IntRotateD, []>, isPPC64; + +def RLWINM8 : MForm_2<21, + (outs G8RC:$rA), (ins G8RC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME), + "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral, + []>; + } // End FXU Operations. @@ -500,7 +524,7 @@ def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src), let mayLoad = 1 in def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp, ptr_rc:$rA), - "lhau $rD, $disp($rA)", LdStGeneral, + "lhau $rD, $disp($rA)", LdStLoad, []>, RegConstraint<"$rA = $ea_result">, NoEncode<"$ea_result">; // NO LWAU! @@ -510,38 +534,38 @@ def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp // Zero extending loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src), - "lbz $rD, $src", LdStGeneral, + "lbz $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>; def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src), - "lhz $rD, $src", LdStGeneral, + "lhz $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>; def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src), - "lwz $rD, $src", LdStGeneral, + "lwz $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64; def LBZX8 : XForm_1<31, 87, (outs G8RC:$rD), (ins memrr:$src), - "lbzx $rD, $src", LdStGeneral, + "lbzx $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>; def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src), - "lhzx $rD, $src", LdStGeneral, + "lhzx $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>; def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src), - "lwzx $rD, $src", LdStGeneral, + "lwzx $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>; // Update forms. let mayLoad = 1 in { def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStGeneral, + "lbzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStGeneral, + "lhzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStGeneral, + "lwzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; } @@ -557,7 +581,8 @@ def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), "", [(set G8RC:$rD, (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64; - + +let hasSideEffects = 1 in { let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo. def LDinto_toc: DSForm_1<58, 0, (outs), (ins G8RC:$reg), "ld 2, 8($reg)", LdStLD, @@ -567,6 +592,7 @@ let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo. def LDtoc_restore : DSForm_1<58, 0, (outs), (ins), "ld 2, 40(1)", LdStLD, [(PPCtoc_restore)]>, isPPC64; +} def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), "ldx $rD, $src", LdStLD, [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64; @@ -587,24 +613,24 @@ def : Pat<(PPCload xaddr:$src), let PPC970_Unit = 2 in { // Truncating stores. def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src), - "stb $rS, $src", LdStGeneral, + "stb $rS, $src", LdStStore, [(truncstorei8 G8RC:$rS, iaddr:$src)]>; def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src), - "sth $rS, $src", LdStGeneral, + "sth $rS, $src", LdStStore, [(truncstorei16 G8RC:$rS, iaddr:$src)]>; def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src), - "stw $rS, $src", LdStGeneral, + "stw $rS, $src", LdStStore, [(truncstorei32 G8RC:$rS, iaddr:$src)]>; def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst), - "stbx $rS, $dst", LdStGeneral, + "stbx $rS, $dst", LdStStore, [(truncstorei8 G8RC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst), - "sthx $rS, $dst", LdStGeneral, + "sthx $rS, $dst", LdStStore, [(truncstorei16 G8RC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst), - "stwx $rS, $dst", LdStGeneral, + "stwx $rS, $dst", LdStStore, [(truncstorei32 G8RC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; // Normal 8-byte stores. @@ -621,14 +647,14 @@ let PPC970_Unit = 2 in { def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStGeneral, + "stbu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStGeneral, + "sthu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index 256370f..6c0f3d3 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1,10 +1,10 @@ -//===- PPCInstrAltivec.td - The PowerPC Altivec Extension --*- tablegen -*-===// -// +//===-- PPCInstrAltivec.td - The PowerPC Altivec Extension -*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the Altivec extension to the PowerPC instruction set. @@ -188,85 +188,85 @@ class VX2_Int<bits<11> xo, string opc, Intrinsic IntID> def DSS : DSS_Form<822, (outs), (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2), - "dss $STRM", LdStGeneral /*FIXME*/, []>; + "dss $STRM", LdStLoad /*FIXME*/, []>; def DSSALL : DSS_Form<822, (outs), (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2), - "dssall", LdStGeneral /*FIXME*/, []>; + "dssall", LdStLoad /*FIXME*/, []>; def DST : DSS_Form<342, (outs), (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB), - "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTT : DSS_Form<342, (outs), (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB), - "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTST : DSS_Form<374, (outs), (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB), - "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTSTT : DSS_Form<374, (outs), (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB), - "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DST64 : DSS_Form<342, (outs), (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB), - "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTT64 : DSS_Form<342, (outs), (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB), - "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTST64 : DSS_Form<374, (outs), (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB), - "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTSTT64 : DSS_Form<374, (outs), (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB), - "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins), - "mfvscr $vD", LdStGeneral, + "mfvscr $vD", LdStStore, [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>; def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB), - "mtvscr $vB", LdStGeneral, + "mtvscr $vB", LdStLoad, [(int_ppc_altivec_mtvscr VRRC:$vB)]>; let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads. def LVEBX: XForm_1<31, 7, (outs VRRC:$vD), (ins memrr:$src), - "lvebx $vD, $src", LdStGeneral, + "lvebx $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; def LVEHX: XForm_1<31, 39, (outs VRRC:$vD), (ins memrr:$src), - "lvehx $vD, $src", LdStGeneral, + "lvehx $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>; def LVEWX: XForm_1<31, 71, (outs VRRC:$vD), (ins memrr:$src), - "lvewx $vD, $src", LdStGeneral, + "lvewx $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>; def LVX : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src), - "lvx $vD, $src", LdStGeneral, + "lvx $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>; def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src), - "lvxl $vD, $src", LdStGeneral, + "lvxl $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>; } def LVSL : XForm_1<31, 6, (outs VRRC:$vD), (ins memrr:$src), - "lvsl $vD, $src", LdStGeneral, + "lvsl $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>, PPC970_Unit_LSU; def LVSR : XForm_1<31, 38, (outs VRRC:$vD), (ins memrr:$src), - "lvsr $vD, $src", LdStGeneral, + "lvsr $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>, PPC970_Unit_LSU; let PPC970_Unit = 2 in { // Stores. def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst), - "stvebx $rS, $dst", LdStGeneral, + "stvebx $rS, $dst", LdStStore, [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>; def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst), - "stvehx $rS, $dst", LdStGeneral, + "stvehx $rS, $dst", LdStStore, [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>; def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst), - "stvewx $rS, $dst", LdStGeneral, + "stvewx $rS, $dst", LdStStore, [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>; def STVX : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst), - "stvx $rS, $dst", LdStGeneral, + "stvx $rS, $dst", LdStStore, [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>; def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst), - "stvxl $rS, $dst", LdStGeneral, + "stvxl $rS, $dst", LdStStore, [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td index 84a15b1..d8e4b2b 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -1,10 +1,10 @@ //===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -51,6 +51,36 @@ class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; } class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; } class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; } +// Two joined instructions; used to emit two adjacent instructions as one. +// The itinerary from the first instruction is used for scheduling and +// classification. +class I2<bits<6> opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : Instruction { + field bits<64> Inst; + + bit PPC64 = 0; // Default value, override with isPPC64 + + let Namespace = "PPC"; + let Inst{0-5} = opcode1; + let Inst{32-37} = opcode2; + let OutOperandList = OOL; + let InOperandList = IOL; + let AsmString = asmstr; + let Itinerary = itin; + + bits<1> PPC970_First = 0; + bits<1> PPC970_Single = 0; + bits<1> PPC970_Cracked = 0; + bits<3> PPC970_Unit = 0; + + /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to + /// these must be reflected there! See comments there for what these are. + let TSFlags{0} = PPC970_First; + let TSFlags{1} = PPC970_Single; + let TSFlags{2} = PPC970_Cracked; + let TSFlags{5-3} = PPC970_Unit; +} // 1.7.1 I-Form class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr, @@ -164,6 +194,35 @@ class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr, let Addr = 0; } +class IForm_and_DForm_1<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2, + dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I2<opcode1, opcode2, OOL, IOL, asmstr, itin> { + bits<5> A; + bits<21> Addr; + + let Pattern = pattern; + bits<24> LI; + + let Inst{6-29} = LI; + let Inst{30} = aa; + let Inst{31} = lk; + + let Inst{38-42} = A; + let Inst{43-47} = Addr{20-16}; // Base Reg + let Inst{48-63} = Addr{15-0}; // Displacement +} + +// This is used to emit BL8+NOP. +class IForm_and_DForm_4_zero<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2, + dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : IForm_and_DForm_1<opcode1, aa, lk, opcode2, + OOL, IOL, asmstr, itin, pattern> { + let A = 0; + let Addr = 0; +} + class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : I<opcode, OOL, IOL, asmstr, itin> { diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 2bc109c..b45ada9 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1,4 +1,4 @@ -//===- PPCInstrInfo.cpp - PowerPC32 Instruction Information -----*- C++ -*-===// +//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===// // // The LLVM Compiler Infrastructure // @@ -34,8 +34,8 @@ #include "PPCGenInstrInfo.inc" namespace llvm { -extern cl::opt<bool> EnablePPC32RS; // FIXME (64-bit): See PPCRegisterInfo.cpp. -extern cl::opt<bool> EnablePPC64RS; // FIXME (64-bit): See PPCRegisterInfo.cpp. +extern cl::opt<bool> DisablePPC32RS; +extern cl::opt<bool> DisablePPC64RS; } using namespace llvm; @@ -49,13 +49,32 @@ PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm) ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( const TargetMachine *TM, const ScheduleDAG *DAG) const { - // Should use subtarget info to pick the right hazard recognizer. For - // now, always return a PPC970 recognizer. - const TargetInstrInfo *TII = TM->getInstrInfo(); - assert(TII && "No InstrInfo?"); - return new PPCHazardRecognizer970(*TII); + unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective(); + if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) { + const InstrItineraryData *II = TM->getInstrItineraryData(); + return new PPCScoreboardHazardRecognizer(II, DAG); + } + + return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG); } +/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer +/// to use for this target when scheduling the DAG. +ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( + const InstrItineraryData *II, + const ScheduleDAG *DAG) const { + unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); + + // Most subtargets use a PPC970 recognizer. + if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2) { + const TargetInstrInfo *TII = TM.getInstrInfo(); + assert(TII && "No InstrInfo?"); + + return new PPCHazardRecognizer970(*TII); + } + + return new PPCScoreboardHazardRecognizer(II, DAG); +} unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { switch (MI->getOpcode()) { @@ -327,6 +346,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc)); } +// This function returns true if a CR spill is necessary and false otherwise. bool PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill, @@ -358,7 +378,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, FrameIdx)); } else { // FIXME: this spills LR immediately to memory in one step. To do this, - // we use R11, which we know cannot be used in the prolog/epilog. This is + // we use X11, which we know cannot be used in the prolog/epilog. This is // a hack. NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11)); NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) @@ -377,9 +397,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) { - if ((EnablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || - (EnablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { - // FIXME (64-bit): Enable + if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || + (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) .addReg(SrcReg, getKillRegState(isKill)), @@ -392,11 +411,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, // We hack this on Darwin by reserving R2. It's probably broken on Linux // at the moment. + bool is64Bit = TM.getSubtargetImpl()->isPPC64(); // We need to store the CR in the low 4-bits of the saved value. First, // issue a MFCR to save all of the CRBits. unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? - PPC::R2 : PPC::R0; - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCRpseud), ScratchReg) + (is64Bit ? PPC::X2 : PPC::R2) : + (is64Bit ? PPC::X0 : PPC::R0); + NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud : + PPC::MFCRpseud), ScratchReg) .addReg(SrcReg, getKillRegState(isKill))); // If the saved register wasn't CR0, shift the bits left so that they are @@ -404,12 +426,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, if (SrcReg != PPC::CR0) { unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4; // rlwinm scratch, scratch, ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) + NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 : + PPC::RLWINM), ScratchReg) .addReg(ScratchReg).addImm(ShiftBits) .addImm(0).addImm(31)); } - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ? + PPC::STW8 : PPC::STW)) .addReg(ScratchReg, getKillRegState(isKill)), FrameIdx)); @@ -486,15 +510,14 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx), MFI.getObjectAlignment(FrameIdx)); NewMIs.back()->addMemOperand(MF, MMO); } -void +bool PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, @@ -514,8 +537,8 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, FrameIdx)); } else { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), - PPC::R11), FrameIdx)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::R11)); + PPC::X11), FrameIdx)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::X11)); } } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg), @@ -524,28 +547,37 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg), FrameIdx)); } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) { - // FIXME: We need a scatch reg here. The trouble with using R0 is that - // it's possible for the stack frame to be so big the save location is - // out of range of immediate offsets, necessitating another register. - // We hack this on Darwin by reserving R2. It's probably broken on Linux - // at the moment. - unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? - PPC::R2 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), - ScratchReg), FrameIdx)); - - // If the reloaded register isn't CR0, shift the bits right so that they are - // in the right CR's slot. - if (DestReg != PPC::CR0) { - unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; - // rlwinm r11, r11, 32-ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) - .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0) - .addImm(31)); + if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || + (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, + get(PPC::RESTORE_CR), DestReg) + , FrameIdx)); + return true; + } else { + // FIXME: We need a scatch reg here. The trouble with using R0 is that + // it's possible for the stack frame to be so big the save location is + // out of range of immediate offsets, necessitating another register. + // We hack this on Darwin by reserving R2. It's probably broken on Linux + // at the moment. + unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? + PPC::R2 : PPC::R0; + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), + ScratchReg), FrameIdx)); + + // If the reloaded register isn't CR0, shift the bits right so that they are + // in the right CR's slot. + if (DestReg != PPC::CR0) { + unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; + // rlwinm r11, r11, 32-ShiftBits, 0, 31. + NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) + .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0) + .addImm(31)); + } + + NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ? + PPC::MTCRF8 : PPC::MTCRF), DestReg) + .addReg(ScratchReg)); } - - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg) - .addReg(ScratchReg)); } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) { unsigned Reg = 0; @@ -590,6 +622,8 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, } else { llvm_unreachable("Unknown regclass!"); } + + return false; } void @@ -602,14 +636,16 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, SmallVector<MachineInstr*, 4> NewMIs; DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); - LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs); + if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) { + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setSpillsCR(); + } for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); const MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx), MFI.getObjectAlignment(FrameIdx)); @@ -649,6 +685,9 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case PPC::GC_LABEL: case PPC::DBG_VALUE: return 0; + case PPC::BL8_NOP_ELF: + case PPC::BLA8_NOP_ELF: + return 8; default: return 4; // PowerPC instructions are all 4 bytes } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 90bacc9..7d49aa1 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -1,4 +1,4 @@ -//===- PPCInstrInfo.h - PowerPC Instruction Information ---------*- C++ -*-===// +//===-- PPCInstrInfo.h - PowerPC Instruction Information --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#ifndef POWERPC32_INSTRUCTIONINFO_H -#define POWERPC32_INSTRUCTIONINFO_H +#ifndef POWERPC_INSTRUCTIONINFO_H +#define POWERPC_INSTRUCTIONINFO_H #include "PPC.h" -#include "llvm/Target/TargetInstrInfo.h" #include "PPCRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "PPCGenInstrInfo.inc" @@ -72,7 +72,7 @@ class PPCInstrInfo : public PPCGenInstrInfo { unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs) const; - void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, + bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, SmallVectorImpl<MachineInstr*> &NewMIs) const; @@ -88,6 +88,9 @@ public: ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetMachine *TM, const ScheduleDAG *DAG) const; + ScheduleHazardRecognizer * + CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const; unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td index f248b5b..748486c 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1,10 +1,10 @@ -//===- PPCInstrInfo.td - The PowerPC Instruction Set -------*- tablegen -*-===// -// +//===-- PPCInstrInfo.td - The PowerPC Instruction Set ------*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the subset of the 32-bit PowerPC instruction set, as used @@ -116,6 +116,9 @@ def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall, def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def PPCcall_nop_SVR4 : SDNode<"PPCISD::CALL_NOP_SVR4", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>; def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; @@ -349,10 +352,10 @@ def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>; //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. -def FPContractions : Predicate<"!NoExcessFPPrecision">; +def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">; def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">; def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">; - +def IsBookE : Predicate<"PPCSubTarget.isBookE()">; //===----------------------------------------------------------------------===// // PowerPC Instruction Definitions. @@ -399,7 +402,14 @@ let usesCustomInserter = 1, // Expanded after instruction selection. // SPILL_CR - Indicate that we're dumping the CR register, so we'll need to // scavenge a register for it. -def SPILL_CR : Pseudo<(outs), (ins GPRC:$cond, memri:$F), +let mayStore = 1 in +def SPILL_CR : Pseudo<(outs), (ins CRRC:$cond, memri:$F), + "", []>; + +// RESTORE_CR - Indicate that we're restoring the CR register (previously +// spilled), so we'll need to scavenge a register for it. +let mayLoad = 1 in +def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F), "", []>; let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { @@ -431,13 +441,7 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { } // Darwin ABI Calls. -let isCall = 1, PPC970_Unit = 7, - // All calls clobber the non-callee saved registers... - Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12, - F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, - V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, - LR,CTR, - CR0,CR1,CR5,CR6,CR7,CARRY] in { +let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL_Darwin : IForm<18, 0, 1, @@ -456,13 +460,7 @@ let isCall = 1, PPC970_Unit = 7, } // SVR4 ABI Calls. -let isCall = 1, PPC970_Unit = 7, - // All calls clobber the non-callee saved registers... - Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12, - F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, - V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, - LR,CTR, - CR0,CR1,CR5,CR6,CR7,CARRY] in { +let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL_SVR4 : IForm<18, 0, 1, @@ -547,6 +545,9 @@ def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>, PPC970_DGroup_Single; +def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)), + (DCBT xoaddr:$dst)>; + // Atomic operations let usesCustomInserter = 1 in { let Defs = [CR0] in { @@ -642,7 +643,7 @@ def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst), isDOT; let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in -def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>; +def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>; //===----------------------------------------------------------------------===// // PPC32 Load Instructions. @@ -651,17 +652,17 @@ def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>; // Unindexed (r+i) Loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src), - "lbz $rD, $src", LdStGeneral, + "lbz $rD, $src", LdStLoad, [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>; def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src), "lha $rD, $src", LdStLHA, [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>, PPC970_DGroup_Cracked; def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src), - "lhz $rD, $src", LdStGeneral, + "lhz $rD, $src", LdStLoad, [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>; def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src), - "lwz $rD, $src", LdStGeneral, + "lwz $rD, $src", LdStLoad, [(set GPRC:$rD, (load iaddr:$src))]>; def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src), @@ -675,22 +676,22 @@ def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), // Unindexed (r+i) Loads with Update (preinc). let mayLoad = 1 in { def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStGeneral, + "lbzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhau $rD, $addr", LdStGeneral, + "lhau $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStGeneral, + "lhzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStGeneral, + "lwzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; @@ -710,25 +711,25 @@ def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), // let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZX : XForm_1<31, 87, (outs GPRC:$rD), (ins memrr:$src), - "lbzx $rD, $src", LdStGeneral, + "lbzx $rD, $src", LdStLoad, [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>; def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src), "lhax $rD, $src", LdStLHA, [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>, PPC970_DGroup_Cracked; def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src), - "lhzx $rD, $src", LdStGeneral, + "lhzx $rD, $src", LdStLoad, [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>; def LWZX : XForm_1<31, 23, (outs GPRC:$rD), (ins memrr:$src), - "lwzx $rD, $src", LdStGeneral, + "lwzx $rD, $src", LdStLoad, [(set GPRC:$rD, (load xaddr:$src))]>; def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src), - "lhbrx $rD, $src", LdStGeneral, + "lhbrx $rD, $src", LdStLoad, [(set GPRC:$rD, (PPClbrx xoaddr:$src, i16))]>; def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src), - "lwbrx $rD, $src", LdStGeneral, + "lwbrx $rD, $src", LdStLoad, [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>; def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src), @@ -746,13 +747,13 @@ def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src), // Unindexed (r+i) Stores. let PPC970_Unit = 2 in { def STB : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src), - "stb $rS, $src", LdStGeneral, + "stb $rS, $src", LdStStore, [(truncstorei8 GPRC:$rS, iaddr:$src)]>; def STH : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src), - "sth $rS, $src", LdStGeneral, + "sth $rS, $src", LdStStore, [(truncstorei16 GPRC:$rS, iaddr:$src)]>; def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src), - "stw $rS, $src", LdStGeneral, + "stw $rS, $src", LdStStore, [(store GPRC:$rS, iaddr:$src)]>; def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst), "stfs $rS, $dst", LdStUX, @@ -766,33 +767,33 @@ def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), let PPC970_Unit = 2 in { def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStGeneral, + "stbu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStGeneral, + "sthu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStGeneral, + "stwu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfsu $rS, $ptroff($ptrreg)", LdStGeneral, + "stfsu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfdu $rS, $ptroff($ptrreg)", LdStGeneral, + "stfdu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; @@ -803,29 +804,29 @@ def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, // let PPC970_Unit = 2 in { def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst), - "stbx $rS, $dst", LdStGeneral, + "stbx $rS, $dst", LdStStore, [(truncstorei8 GPRC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst), - "sthx $rS, $dst", LdStGeneral, + "sthx $rS, $dst", LdStStore, [(truncstorei16 GPRC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst), - "stwx $rS, $dst", LdStGeneral, + "stwx $rS, $dst", LdStStore, [(store GPRC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; let mayStore = 1 in { def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB), - "stwux $rS, $rA, $rB", LdStGeneral, + "stwux $rS, $rA, $rB", LdStStore, []>; } def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst), - "sthbrx $rS, $dst", LdStGeneral, + "sthbrx $rS, $dst", LdStStore, [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>, PPC970_DGroup_Cracked; def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst), - "stwbrx $rS, $dst", LdStGeneral, + "stwbrx $rS, $dst", LdStStore, [(PPCstbrx GPRC:$rS, xoaddr:$dst, i32)]>, PPC970_DGroup_Cracked; @@ -1091,7 +1092,7 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins), "mfspr $rT, 256", IntGeneral>, PPC970_DGroup_First, PPC970_Unit_FXU; -def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS), +def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp index 4590f00..a6528c0 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp @@ -291,9 +291,10 @@ void PPC64CompilationCallback() { } #endif -extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, - unsigned *OrigCallAddrPlus4, - bool is64Bit) { +extern "C" { +static void* LLVM_ATTRIBUTE_USED PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, + unsigned *OrigCallAddrPlus4, + bool is64Bit) { // Adjust the pointer to the address of the call instruction in the stub // emitted by emitFunctionStub, rather than the instruction after it. unsigned *StubCallAddr = StubCallAddrPlus4 - 1; @@ -337,6 +338,7 @@ extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, // stack after we restore all regs. return Target; } +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h index 47ead59..2f8243a 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h @@ -1,4 +1,4 @@ -//===- PPCJITInfo.h - PowerPC impl. of the JIT interface --------*- C++ -*-===// +//===-- PPCJITInfo.h - PowerPC impl. of the JIT interface -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp index 33af426..276edcb 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -140,7 +140,7 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, switch (MO.getType()) { default: MI->dump(); - assert(0 && "unknown operand type"); + llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: assert(!MO.getSubReg() && "Subregs should be eliminated!"); MCOp = MCOperand::CreateReg(MO.getReg()); @@ -166,6 +166,8 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP, isDarwin); break; + case MachineOperand::MO_RegisterMask: + continue; } OutMI.addOperand(MCOp); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp new file mode 100644 index 0000000..6a0aec8 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp @@ -0,0 +1,15 @@ +//===-- PPCMachineFunctionInfo.cpp - Private data used for PowerPC --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PPCMachineFunctionInfo.h" + +using namespace llvm; + +void PPCFunctionInfo::anchor() { } + diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h index e2649c8..24caffa 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -21,7 +21,8 @@ namespace llvm { /// PPCFunctionInfo - This class is derived from MachineFunction private /// PowerPC target-specific information for each MachineFunction. class PPCFunctionInfo : public MachineFunctionInfo { -private: + virtual void anchor(); + /// FramePointerSaveIndex - Frame index of where the old frame pointer is /// stored. Also used as an anchor for instructions that need to be altered /// when using frame pointers (dyna_add, dyna_sub.) diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h b/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h index 3164e33..17b836d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h @@ -1,4 +1,4 @@ -//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table ---------------===// +//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 2e90b7a..ef13571 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- PPCRegisterInfo.cpp - PowerPC Register Information -------*- C++ -*-===// +//===-- PPCRegisterInfo.cpp - PowerPC Register Information ----------------===// // // The LLVM Compiler Infrastructure // @@ -13,10 +13,10 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "reginfo" +#include "PPCRegisterInfo.h" #include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" -#include "PPCRegisterInfo.h" #include "PPCFrameLowering.h" #include "PPCSubtarget.h" #include "llvm/CallingConv.h" @@ -46,15 +46,14 @@ #define GET_REGINFO_TARGET_DESC #include "PPCGenRegisterInfo.inc" -// FIXME (64-bit): Eventually enable by default. namespace llvm { -cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger", +cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger", cl::init(false), - cl::desc("Enable PPC32 register scavenger"), + cl::desc("Disable PPC32 register scavenger"), cl::Hidden); -cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger", +cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger", cl::init(false), - cl::desc("Enable PPC64 register scavenger"), + cl::desc("Disable PPC64 register scavenger"), cl::Hidden); } @@ -63,8 +62,8 @@ using namespace llvm; // FIXME (64-bit): Should be inlined. bool PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const { - return ((EnablePPC32RS && !Subtarget.isPPC64()) || - (EnablePPC64RS && Subtarget.isPPC64())); + return ((!DisablePPC32RS && !Subtarget.isPPC64()) || + (!DisablePPC64RS && Subtarget.isPPC64())); } PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, @@ -99,122 +98,22 @@ PPCRegisterInfo::getPointerRegClass(unsigned Kind) const { return &PPC::GPRCRegClass; } -const unsigned* +const uint16_t* PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - // 32-bit Darwin calling convention. - static const unsigned Darwin32_CalleeSavedRegs[] = { - PPC::R13, PPC::R14, PPC::R15, - PPC::R16, PPC::R17, PPC::R18, PPC::R19, - PPC::R20, PPC::R21, PPC::R22, PPC::R23, - PPC::R24, PPC::R25, PPC::R26, PPC::R27, - PPC::R28, PPC::R29, PPC::R30, PPC::R31, - - PPC::F14, PPC::F15, PPC::F16, PPC::F17, - PPC::F18, PPC::F19, PPC::F20, PPC::F21, - PPC::F22, PPC::F23, PPC::F24, PPC::F25, - PPC::F26, PPC::F27, PPC::F28, PPC::F29, - PPC::F30, PPC::F31, - - PPC::CR2, PPC::CR3, PPC::CR4, - PPC::V20, PPC::V21, PPC::V22, PPC::V23, - PPC::V24, PPC::V25, PPC::V26, PPC::V27, - PPC::V28, PPC::V29, PPC::V30, PPC::V31, - - PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, - PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, - PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, - - PPC::LR, 0 - }; - - // 32-bit SVR4 calling convention. - static const unsigned SVR4_CalleeSavedRegs[] = { - PPC::R14, PPC::R15, - PPC::R16, PPC::R17, PPC::R18, PPC::R19, - PPC::R20, PPC::R21, PPC::R22, PPC::R23, - PPC::R24, PPC::R25, PPC::R26, PPC::R27, - PPC::R28, PPC::R29, PPC::R30, PPC::R31, - - PPC::F14, PPC::F15, PPC::F16, PPC::F17, - PPC::F18, PPC::F19, PPC::F20, PPC::F21, - PPC::F22, PPC::F23, PPC::F24, PPC::F25, - PPC::F26, PPC::F27, PPC::F28, PPC::F29, - PPC::F30, PPC::F31, - - PPC::CR2, PPC::CR3, PPC::CR4, - - PPC::VRSAVE, - - PPC::V20, PPC::V21, PPC::V22, PPC::V23, - PPC::V24, PPC::V25, PPC::V26, PPC::V27, - PPC::V28, PPC::V29, PPC::V30, PPC::V31, - - PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, - PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, - PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, - - 0 - }; - // 64-bit Darwin calling convention. - static const unsigned Darwin64_CalleeSavedRegs[] = { - PPC::X14, PPC::X15, - PPC::X16, PPC::X17, PPC::X18, PPC::X19, - PPC::X20, PPC::X21, PPC::X22, PPC::X23, - PPC::X24, PPC::X25, PPC::X26, PPC::X27, - PPC::X28, PPC::X29, PPC::X30, PPC::X31, - - PPC::F14, PPC::F15, PPC::F16, PPC::F17, - PPC::F18, PPC::F19, PPC::F20, PPC::F21, - PPC::F22, PPC::F23, PPC::F24, PPC::F25, - PPC::F26, PPC::F27, PPC::F28, PPC::F29, - PPC::F30, PPC::F31, - - PPC::CR2, PPC::CR3, PPC::CR4, - PPC::V20, PPC::V21, PPC::V22, PPC::V23, - PPC::V24, PPC::V25, PPC::V26, PPC::V27, - PPC::V28, PPC::V29, PPC::V30, PPC::V31, - - PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, - PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, - PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, - - PPC::LR8, 0 - }; - - // 64-bit SVR4 calling convention. - static const unsigned SVR4_64_CalleeSavedRegs[] = { - PPC::X14, PPC::X15, - PPC::X16, PPC::X17, PPC::X18, PPC::X19, - PPC::X20, PPC::X21, PPC::X22, PPC::X23, - PPC::X24, PPC::X25, PPC::X26, PPC::X27, - PPC::X28, PPC::X29, PPC::X30, PPC::X31, - - PPC::F14, PPC::F15, PPC::F16, PPC::F17, - PPC::F18, PPC::F19, PPC::F20, PPC::F21, - PPC::F22, PPC::F23, PPC::F24, PPC::F25, - PPC::F26, PPC::F27, PPC::F28, PPC::F29, - PPC::F30, PPC::F31, - - PPC::CR2, PPC::CR3, PPC::CR4, - - PPC::VRSAVE, - - PPC::V20, PPC::V21, PPC::V22, PPC::V23, - PPC::V24, PPC::V25, PPC::V26, PPC::V27, - PPC::V28, PPC::V29, PPC::V30, PPC::V31, + if (Subtarget.isDarwinABI()) + return Subtarget.isPPC64() ? CSR_Darwin64_SaveList : + CSR_Darwin32_SaveList; - PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, - PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, - PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, + return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList; +} - 0 - }; - +const unsigned* +PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { if (Subtarget.isDarwinABI()) - return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegs : - Darwin32_CalleeSavedRegs; + return Subtarget.isPPC64() ? CSR_Darwin64_RegMask : + CSR_Darwin32_RegMask; - return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs; + return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask; } BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { @@ -247,9 +146,6 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::R13); Reserved.set(PPC::R31); - if (!requiresRegisterScavenging(MF)) - Reserved.set(PPC::R0); // FIXME (64-bit): Remove - Reserved.set(PPC::X0); Reserved.set(PPC::X1); Reserved.set(PPC::X13); @@ -259,7 +155,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (Subtarget.isSVR4ABI()) { Reserved.set(PPC::X2); } - // Reserve R2 on Darwin to hack around the problem of save/restore of CR + // Reserve X2 on Darwin to hack around the problem of save/restore of CR // when the stack frame is too big to address directly; we need two regs. // This is a hack. if (Subtarget.isDarwinABI()) { @@ -273,6 +169,29 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } +unsigned +PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const unsigned DefaultSafety = 1; + + switch (RC->getID()) { + default: + return 0; + case PPC::G8RCRegClassID: + case PPC::GPRCRegClassID: { + unsigned FP = TFI->hasFP(MF) ? 1 : 0; + return 32 - FP - DefaultSafety; + } + case PPC::F8RCRegClassID: + case PPC::F4RCRegClassID: + case PPC::VRRCRegClassID: + return 32 - DefaultSafety; + case PPC::CRRCRegClassID: + return 8 - DefaultSafety; + } +} + //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// @@ -280,7 +199,8 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { void PPCRegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - if (GuaranteedTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) { + if (MF.getTarget().Options.GuaranteedTailCallOpt && + I->getOpcode() == PPC::ADJCALLSTACKUP) { // Add (actually subtract) back the amount the callee popped on return. if (int CalleeAmt = I->getOperand(1).getImm()) { bool is64Bit = Subtarget.isPPC64(); @@ -295,8 +215,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, DebugLoc dl = MI->getDebugLoc(); if (isInt<16>(CalleeAmt)) { - BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg).addReg(StackReg). - addImm(CalleeAmt); + BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) + .addReg(StackReg, RegState::Kill) + .addImm(CalleeAmt); } else { MachineBasicBlock::iterator MBBI = I; BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) @@ -304,9 +225,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) .addReg(TmpReg, RegState::Kill) .addImm(CalleeAmt & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(ADDInstr)) - .addReg(StackReg) - .addReg(StackReg) + BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) + .addReg(StackReg, RegState::Kill) .addReg(TmpReg); } } @@ -403,12 +323,12 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. BuildMI(MBB, II, dl, TII.get(PPC::STDUX)) .addReg(Reg, RegState::Kill) - .addReg(PPC::X1) + .addReg(PPC::X1, RegState::Define) .addReg(MI.getOperand(1).getReg()); else BuildMI(MBB, II, dl, TII.get(PPC::STDUX)) .addReg(PPC::X0, RegState::Kill) - .addReg(PPC::X1) + .addReg(PPC::X1, RegState::Define) .addReg(MI.getOperand(1).getReg()); if (!MI.getOperand(1).isKill()) @@ -424,7 +344,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, } else { BuildMI(MBB, II, dl, TII.get(PPC::STWUX)) .addReg(Reg, RegState::Kill) - .addReg(PPC::R1) + .addReg(PPC::R1, RegState::Define) .addReg(MI.getOperand(1).getReg()); if (!MI.getOperand(1).isKill()) @@ -455,28 +375,32 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex, int SPAdj, RegScavenger *RS) const { // Get the instruction. - MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset>, <FI> + MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset> // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); DebugLoc dl = MI.getDebugLoc(); - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; - unsigned Reg = findScratchRegister(II, RS, RC, SPAdj); - unsigned SrcReg = MI.getOperand(0).getReg(); + // FIXME: Once LLVM supports creating virtual registers here, or the register + // scavenger can return multiple registers, stop using reserved registers + // here. + (void) SPAdj; + (void) RS; + bool LP64 = Subtarget.isPPC64(); + unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) : + (LP64 ? PPC::X0 : PPC::R0); + unsigned SrcReg = MI.getOperand(0).getReg(); // We need to store the CR in the low 4-bits of the saved value. First, issue // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg. - BuildMI(MBB, II, dl, TII.get(PPC::MFCRpseud), Reg) + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFCR8pseud : PPC::MFCRpseud), Reg) .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); // If the saved register wasn't CR0, shift the bits left so that they are in // CR0's slot. if (SrcReg != PPC::CR0) // rlwinm rA, rA, ShiftBits, 0, 31. - BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg) + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg) .addReg(Reg, RegState::Kill) .addImm(getPPCRegisterNumbering(SrcReg) * 4) .addImm(0) @@ -490,6 +414,48 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, MBB.erase(II); } +void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, + unsigned FrameIndex, int SPAdj, + RegScavenger *RS) const { + // Get the instruction. + MachineInstr &MI = *II; // ; <DestReg> = RESTORE_CR <offset> + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + // FIXME: Once LLVM supports creating virtual registers here, or the register + // scavenger can return multiple registers, stop using reserved registers + // here. + (void) SPAdj; + (void) RS; + + bool LP64 = Subtarget.isPPC64(); + unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) : + (LP64 ? PPC::X0 : PPC::R0); + unsigned DestReg = MI.getOperand(0).getReg(); + assert(MI.definesRegister(DestReg) && + "RESTORE_CR does not define its destination"); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LWZ8 : PPC::LWZ), + Reg), FrameIndex); + + // If the reloaded register isn't CR0, shift the bits right so that they are + // in the right CR's slot. + if (DestReg != PPC::CR0) { + unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; + // rlwinm r11, r11, 32-ShiftBits, 0, 31. + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg) + .addReg(Reg).addImm(32-ShiftBits).addImm(0) + .addImm(31); + } + + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTCRF8 : PPC::MTCRF), DestReg) + .addReg(Reg); + + // Discard the pseudo instruction. + MBB.erase(II); +} + void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const { @@ -535,16 +501,23 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, return; } - // Special case for pseudo-op SPILL_CR. - if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable by default. + // Special case for pseudo-ops SPILL_CR and RESTORE_CR. + if (requiresRegisterScavenging(MF)) { if (OpC == PPC::SPILL_CR) { lowerCRSpilling(II, FrameIndex, SPAdj, RS); return; + } else if (OpC == PPC::RESTORE_CR) { + lowerCRRestore(II, FrameIndex, SPAdj, RS); + return; } + } // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). + + bool is64Bit = Subtarget.isPPC64(); MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ? - PPC::R31 : PPC::R1, + (is64Bit ? PPC::X31 : PPC::R31) : + (is64Bit ? PPC::X1 : PPC::R1), false); // Figure out if the offset in the instruction is shifted right two bits. This @@ -581,7 +554,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // clear can be encoded. This is extremely uncommon, because normally you // only "std" to a stack slot that is at least 4-byte aligned, but it can // happen in invalid code. - if (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) { + if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm + (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) { if (isIXAddr) Offset >>= 2; // The actual encoded value has the low two bits zero. MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); @@ -590,19 +564,19 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // The offset doesn't fit into a single register, scavenge one to build the // offset in. - // FIXME: figure out what SPAdj is doing here. - // FIXME (64-bit): Use "findScratchRegister". unsigned SReg; - if (requiresRegisterScavenging(MF)) - SReg = findScratchRegister(II, RS, &PPC::GPRCRegClass, SPAdj); - else - SReg = PPC::R0; + if (requiresRegisterScavenging(MF)) { + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj); + } else + SReg = is64Bit ? PPC::X0 : PPC::R0; // Insert a set of rA with the full offset value before the ld, st, or add - BuildMI(MBB, II, dl, TII.get(PPC::LIS), SReg) + BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg) .addImm(Offset >> 16); - BuildMI(MBB, II, dl, TII.get(PPC::ORI), SReg) + BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg) .addReg(SReg, RegState::Kill) .addImm(Offset); @@ -624,7 +598,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned StackReg = MI.getOperand(FIOperandNo).getReg(); MI.getOperand(OperandBase).ChangeToRegister(StackReg, false); - MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false); + MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true); } unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const { diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h index 1cc7213..b1e6a72 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -1,4 +1,4 @@ -//===- PPCRegisterInfo.h - PowerPC Register Information Impl -----*- C++ -*-==// +//===-- PPCRegisterInfo.h - PowerPC Register Information Impl ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -37,8 +37,12 @@ public: /// This is used for addressing modes. virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const; + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const; + /// Code Generation virtual methods... - const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; + const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; + const unsigned *getCallPreservedMask(CallingConv::ID CC) const; BitVector getReservedRegs(const MachineFunction &MF) const; @@ -54,6 +58,8 @@ public: int SPAdj, RegScavenger *RS) const; void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex, int SPAdj, RegScavenger *RS) const; + void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex, + int SPAdj, RegScavenger *RS) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index 1acdf4e..0e55313 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -1,10 +1,10 @@ -//===- PPCRegisterInfo.td - The PowerPC Register File ------*- tablegen -*-===// -// +//===-- PPCRegisterInfo.td - The PowerPC Register File -----*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h b/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h index a33e7e0..0b392f9 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h @@ -1,4 +1,4 @@ -//===- PPCRelocations.h - PPC32 Code Relocations ----------------*- C++ -*-===// +//===-- PPCRelocations.h - PPC Code Relocations -----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef PPC32RELOCATIONS_H -#define PPC32RELOCATIONS_H +#ifndef PPCRELOCATIONS_H +#define PPCRELOCATIONS_H #include "llvm/CodeGen/MachineRelocation.h" diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td index 9664f14..8c0a858 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td @@ -1,10 +1,10 @@ -//===- PPCSchedule.td - PowerPC Scheduling Definitions -----*- tablegen -*-===// -// +//===-- PPCSchedule.td - PowerPC Scheduling Definitions ----*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -50,7 +50,8 @@ def BrMCRX : InstrItinClass; def LdStDCBA : InstrItinClass; def LdStDCBF : InstrItinClass; def LdStDCBI : InstrItinClass; -def LdStGeneral : InstrItinClass; +def LdStLoad : InstrItinClass; +def LdStStore : InstrItinClass; def LdStDSS : InstrItinClass; def LdStICBI : InstrItinClass; def LdStUX : InstrItinClass; @@ -103,9 +104,11 @@ def VecVSR : InstrItinClass; // Processor instruction itineraries. include "PPCScheduleG3.td" +include "PPCSchedule440.td" include "PPCScheduleG4.td" include "PPCScheduleG4Plus.td" include "PPCScheduleG5.td" +include "PPCScheduleA2.td" //===----------------------------------------------------------------------===// // Instruction to itinerary class map - When add new opcodes to the supported @@ -149,8 +152,8 @@ include "PPCScheduleG5.td" // dcbf LdStDCBF // dcbi LdStDCBI // dcbst LdStDCBF -// dcbt LdStGeneral -// dcbtst LdStGeneral +// dcbt LdStLoad +// dcbtst LdStLoad // dcbz LdStDCBF // divd IntDivD // divdu IntDivD @@ -159,9 +162,9 @@ include "PPCScheduleG5.td" // dss LdStDSS // dst LdStDSS // dstst LdStDSS -// eciwx LdStGeneral -// ecowx LdStGeneral -// eieio LdStGeneral +// eciwx LdStLoad +// ecowx LdStLoad +// eieio LdStLoad // eqv IntGeneral // extsb IntGeneral // extsh IntGeneral @@ -201,10 +204,10 @@ include "PPCScheduleG5.td" // fsubs FPGeneral // icbi LdStICBI // isync SprISYNC -// lbz LdStGeneral -// lbzu LdStGeneral +// lbz LdStLoad +// lbzu LdStLoad // lbzux LdStUX -// lbzx LdStGeneral +// lbzx LdStLoad // ld LdStLD // ldarx LdStLDARX // ldu LdStLD @@ -222,11 +225,11 @@ include "PPCScheduleG5.td" // lhau LdStLHA // lhaux LdStLHA // lhax LdStLHA -// lhbrx LdStGeneral -// lhz LdStGeneral -// lhzu LdStGeneral +// lhbrx LdStLoad +// lhz LdStLoad +// lhzu LdStLoad // lhzux LdStUX -// lhzx LdStGeneral +// lhzx LdStLoad // lmw LdStLMW // lswi LdStLMW // lswx LdStLMW @@ -241,11 +244,11 @@ include "PPCScheduleG5.td" // lwarx LdStLWARX // lwaux LdStLHA // lwax LdStLHA -// lwbrx LdStGeneral -// lwz LdStGeneral -// lwzu LdStGeneral +// lwbrx LdStLoad +// lwz LdStLoad +// lwzu LdStLoad // lwzux LdStUX -// lwzx LdStGeneral +// lwzx LdStLoad // mcrf BrMCR // mcrfs FPGeneral // mcrxr BrMCRX @@ -306,10 +309,10 @@ include "PPCScheduleG5.td" // srawi IntShift // srd IntRotateD // srw IntGeneral -// stb LdStGeneral -// stbu LdStGeneral -// stbux LdStGeneral -// stbx LdStGeneral +// stb LdStStore +// stbu LdStStore +// stbux LdStStore +// stbx LdStStore // std LdStSTD // stdcx. LdStSTDCX // stdu LdStSTD @@ -324,11 +327,11 @@ include "PPCScheduleG5.td" // stfsu LdStUX // stfsux LdStUX // stfsx LdStUX -// sth LdStGeneral -// sthbrx LdStGeneral -// sthu LdStGeneral -// sthux LdStGeneral -// sthx LdStGeneral +// sth LdStStore +// sthbrx LdStStore +// sthu LdStStore +// sthux LdStStore +// sthx LdStStore // stmw LdStLMW // stswi LdStLMW // stswx LdStLMW @@ -337,12 +340,12 @@ include "PPCScheduleG5.td" // stvewx LdStSTVEBX // stvx LdStSTVEBX // stvxl LdStSTVEBX -// stw LdStGeneral -// stwbrx LdStGeneral +// stw LdStStore +// stwbrx LdStStore // stwcx. LdStSTWCX -// stwu LdStGeneral -// stwux LdStGeneral -// stwx LdStGeneral +// stwu LdStStore +// stwux LdStStore +// stwx LdStStore // subf IntGeneral // subfc IntGeneral // subfe IntGeneral diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td new file mode 100644 index 0000000..419faea --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td @@ -0,0 +1,616 @@ +//===-- PPCSchedule440.td - PPC 440 Scheduling Definitions -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// Primary reference: +// PowerPC 440x6 Embedded Processor Core User's Manual. +// IBM (as updated in) 2010. + +// The basic PPC 440 does not include a floating-point unit; the pipeline +// timings here are constructed to match the FP2 unit shipped with the +// PPC-440- and PPC-450-based Blue Gene (L and P) supercomputers. +// References: +// S. Chatterjee, et al. Design and exploitation of a high-performance +// SIMD floating-point unit for Blue Gene/L. +// IBM J. Res. & Dev. 49 (2/3) March/May 2005. +// also: +// Carlos Sosa and Brant Knudson. IBM System Blue Gene Solution: +// Blue Gene/P Application Development. +// IBM (as updated in) 2009. + +//===----------------------------------------------------------------------===// +// Functional units on the PowerPC 440/450 chip sets +// +def IFTH1 : FuncUnit; // Fetch unit 1 +def IFTH2 : FuncUnit; // Fetch unit 2 +def PDCD1 : FuncUnit; // Decode unit 1 +def PDCD2 : FuncUnit; // Decode unit 2 +def DISS1 : FuncUnit; // Issue unit 1 +def DISS2 : FuncUnit; // Issue unit 2 +def LRACC : FuncUnit; // Register access and dispatch for + // the simple integer (J-pipe) and + // load/store (L-pipe) pipelines +def IRACC : FuncUnit; // Register access and dispatch for + // the complex integer (I-pipe) pipeline +def FRACC : FuncUnit; // Register access and dispatch for + // the floating-point execution (F-pipe) pipeline +def IEXE1 : FuncUnit; // Execution stage 1 for the I pipeline +def IEXE2 : FuncUnit; // Execution stage 2 for the I pipeline +def IWB : FuncUnit; // Write-back unit for the I pipeline +def JEXE1 : FuncUnit; // Execution stage 1 for the J pipeline +def JEXE2 : FuncUnit; // Execution stage 2 for the J pipeline +def JWB : FuncUnit; // Write-back unit for the J pipeline +def AGEN : FuncUnit; // Address generation for the L pipeline +def CRD : FuncUnit; // D-cache access for the L pipeline +def LWB : FuncUnit; // Write-back unit for the L pipeline +def FEXE1 : FuncUnit; // Execution stage 1 for the F pipeline +def FEXE2 : FuncUnit; // Execution stage 2 for the F pipeline +def FEXE3 : FuncUnit; // Execution stage 3 for the F pipeline +def FEXE4 : FuncUnit; // Execution stage 4 for the F pipeline +def FEXE5 : FuncUnit; // Execution stage 5 for the F pipeline +def FEXE6 : FuncUnit; // Execution stage 6 for the F pipeline +def FWB : FuncUnit; // Write-back unit for the F pipeline + +def LWARX_Hold : FuncUnit; // This is a pseudo-unit which is used + // to make sure that no lwarx/stwcx. + // instructions are issued while another + // lwarx/stwcx. is in the L pipe. + +def GPR_Bypass : Bypass; // The bypass for general-purpose regs. +def FPR_Bypass : Bypass; // The bypass for floating-point regs. + +// Notes: +// Instructions are held in the FRACC, LRACC and IRACC pipeline +// stages until their source operands become ready. Exceptions: +// - Store instructions will hold in the AGEN stage +// - The integer multiply-accumulate instruction will hold in +// the IEXE1 stage +// +// For most I-pipe operations, the result is available at the end of +// the IEXE1 stage. Operations such as multiply and divide must +// continue to execute in IEXE2 and IWB. Divide resides in IWB for +// 33 cycles (multiply also calculates its result in IWB). For all +// J-pipe instructions, the result is available +// at the end of the JEXE1 stage. Loads have a 3-cycle latency +// (data is not available until after the LWB stage). +// +// The L1 cache hit latency is four cycles for floating point loads +// and three cycles for integer loads. +// +// The stwcx. instruction requires both the LRACC and the IRACC +// dispatch stages. It must be issued from DISS0. +// +// All lwarx/stwcx. instructions hold in LRACC if another +// uncommitted lwarx/stwcx. is in AGEN, CRD, or LWB. +// +// msync (a.k.a. sync) and mbar will hold in LWB until all load/store +// resources are empty. AGEN and CRD are held empty until the msync/mbar +// commits. +// +// Most floating-point instructions, computational and move, +// have a 5-cycle latency. Divide takes longer (30 cycles). Instructions that +// update the CR take 2 cycles. Stores take 3 cycles and, as mentioned above, +// loads take 4 cycles (for L1 hit). + +// +// This file defines the itinerary class data for the PPC 440 processor. +// +//===----------------------------------------------------------------------===// + + +def PPC440Itineraries : ProcessorItineraries< + [IFTH1, IFTH2, PDCD1, PDCD2, DISS1, DISS2, FRACC, + IRACC, IEXE1, IEXE2, IWB, LRACC, JEXE1, JEXE2, JWB, AGEN, CRD, LWB, + FEXE1, FEXE2, FEXE3, FEXE4, FEXE5, FEXE6, FWB, LWARX_Hold], + [GPR_Bypass, FPR_Bypass], [ + InstrItinData<IntGeneral , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC, LRACC]>, + InstrStage<1, [IEXE1, JEXE1]>, + InstrStage<1, [IEXE2, JEXE2]>, + InstrStage<1, [IWB, JWB]>], + [6, 4, 4], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntCompare , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC, LRACC]>, + InstrStage<1, [IEXE1, JEXE1]>, + InstrStage<1, [IEXE2, JEXE2]>, + InstrStage<1, [IWB, JWB]>], + [6, 4, 4], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntDivW , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<33, [IWB]>], + [40, 4, 4], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMFFS , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [7, 4, 4], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMTFSB0 , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [7, 4, 4], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHW , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [8, 4, 4], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHWU , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [8, 4, 4], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulLI , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [8, 4, 4], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotate , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC, LRACC]>, + InstrStage<1, [IEXE1, JEXE1]>, + InstrStage<1, [IEXE2, JEXE2]>, + InstrStage<1, [IWB, JWB]>], + [6, 4, 4], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntShift , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC, LRACC]>, + InstrStage<1, [IEXE1, JEXE1]>, + InstrStage<1, [IEXE2, JEXE2]>, + InstrStage<1, [IWB, JWB]>], + [6, 4, 4], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntTrapW , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [6, 4], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<BrB , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [8, 4], + [NoBypass, GPR_Bypass]>, + InstrItinData<BrCR , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [8, 4, 4], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<BrMCR , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [8, 4, 4], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<BrMCRX , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [8, 4, 4], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBA , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStDCBF , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStDCBI , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLoad , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [9, 5], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStStore , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStICBI , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStUX , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5, 5], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLFD , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [9, 5, 5], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLFDU , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [9, 5, 5], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLHA , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLMW , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLWARX , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1]>, + InstrStage<1, [IRACC], 0>, + InstrStage<4, [LWARX_Hold], 0>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTD , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1]>, + InstrStage<1, [IRACC], 0>, + InstrStage<4, [LWARX_Hold], 0>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTD , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1]>, + InstrStage<1, [IRACC], 0>, + InstrStage<4, [LWARX_Hold], 0>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTWCX , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1]>, + InstrStage<1, [IRACC], 0>, + InstrStage<4, [LWARX_Hold], 0>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSync , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<3, [AGEN], 1>, + InstrStage<2, [CRD], 1>, + InstrStage<1, [LWB]>]>, + InstrItinData<SprISYNC , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [FRACC], 0>, + InstrStage<1, [LRACC], 0>, + InstrStage<1, [IRACC]>, + InstrStage<1, [FEXE1], 0>, + InstrStage<1, [AGEN], 0>, + InstrStage<1, [JEXE1], 0>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [FEXE2], 0>, + InstrStage<1, [CRD], 0>, + InstrStage<1, [JEXE2], 0>, + InstrStage<1, [IEXE2]>, + InstrStage<6, [FEXE3], 0>, + InstrStage<6, [LWB], 0>, + InstrStage<6, [JWB], 0>, + InstrStage<6, [IWB]>]>, + InstrItinData<SprMFSR , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [6, 4], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMTMSR , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [6, 4], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMTSR , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<3, [IWB]>], + [9, 4], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprTLBSYNC , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>]>, + InstrItinData<SprMFCR , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [8, 4], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMFMSR , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [7, 4], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<SprMFSPR , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<3, [IWB]>], + [10, 4], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMFTB , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<3, [IWB]>], + [10, 4], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSPR , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<3, [IWB]>], + [10, 4], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSRIN , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<3, [IWB]>], + [10, 4], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprRFI , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [8, 4], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprSC , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [IRACC]>, + InstrStage<1, [IEXE1]>, + InstrStage<1, [IEXE2]>, + InstrStage<1, [IWB]>], + [8, 4], + [NoBypass, GPR_Bypass]>, + InstrItinData<FPGeneral , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [FRACC]>, + InstrStage<1, [FEXE1]>, + InstrStage<1, [FEXE2]>, + InstrStage<1, [FEXE3]>, + InstrStage<1, [FEXE4]>, + InstrStage<1, [FEXE5]>, + InstrStage<1, [FEXE6]>, + InstrStage<1, [FWB]>], + [10, 4, 4], + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPCompare , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [FRACC]>, + InstrStage<1, [FEXE1]>, + InstrStage<1, [FEXE2]>, + InstrStage<1, [FEXE3]>, + InstrStage<1, [FEXE4]>, + InstrStage<1, [FEXE5]>, + InstrStage<1, [FEXE6]>, + InstrStage<1, [FWB]>], + [10, 4, 4], + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivD , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [FRACC]>, + InstrStage<1, [FEXE1]>, + InstrStage<1, [FEXE2]>, + InstrStage<1, [FEXE3]>, + InstrStage<1, [FEXE4]>, + InstrStage<1, [FEXE5]>, + InstrStage<1, [FEXE6]>, + InstrStage<25, [FWB]>], + [35, 4, 4], + [NoBypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivS , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [FRACC]>, + InstrStage<1, [FEXE1]>, + InstrStage<1, [FEXE2]>, + InstrStage<1, [FEXE3]>, + InstrStage<1, [FEXE4]>, + InstrStage<1, [FEXE5]>, + InstrStage<1, [FEXE6]>, + InstrStage<13, [FWB]>], + [23, 4, 4], + [NoBypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPFused , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [FRACC]>, + InstrStage<1, [FEXE1]>, + InstrStage<1, [FEXE2]>, + InstrStage<1, [FEXE3]>, + InstrStage<1, [FEXE4]>, + InstrStage<1, [FEXE5]>, + InstrStage<1, [FEXE6]>, + InstrStage<1, [FWB]>], + [10, 4, 4, 4], + [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPRes , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [FRACC]>, + InstrStage<1, [FEXE1]>, + InstrStage<1, [FEXE2]>, + InstrStage<1, [FEXE3]>, + InstrStage<1, [FEXE4]>, + InstrStage<1, [FEXE5]>, + InstrStage<1, [FEXE6]>, + InstrStage<1, [FWB]>], + [10, 4], + [FPR_Bypass, FPR_Bypass]> +]>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td new file mode 100644 index 0000000..857ba40 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td @@ -0,0 +1,652 @@ +//===- PPCScheduleA2.td - PPC A2 Scheduling Definitions --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// Primary reference: +// A2 Processor User's Manual. +// IBM (as updated in) 2010. + +//===----------------------------------------------------------------------===// +// Functional units on the PowerPC A2 chip sets +// +def IU0to3_0 : FuncUnit; // Fetch unit 1 to 4 slot 1 +def IU0to3_1 : FuncUnit; // Fetch unit 1 to 4 slot 2 +def IU0to3_2 : FuncUnit; // Fetch unit 1 to 4 slot 3 +def IU0to3_3 : FuncUnit; // Fetch unit 1 to 4 slot 4 +def IU4_0 : FuncUnit; // Instruction buffer slot 1 +def IU4_1 : FuncUnit; // Instruction buffer slot 2 +def IU4_2 : FuncUnit; // Instruction buffer slot 3 +def IU4_3 : FuncUnit; // Instruction buffer slot 4 +def IU4_4 : FuncUnit; // Instruction buffer slot 5 +def IU4_5 : FuncUnit; // Instruction buffer slot 6 +def IU4_6 : FuncUnit; // Instruction buffer slot 7 +def IU4_7 : FuncUnit; // Instruction buffer slot 8 +def IU5 : FuncUnit; // Dependency resolution +def IU6 : FuncUnit; // Instruction issue +def RF0 : FuncUnit; +def XRF1 : FuncUnit; +def XEX1 : FuncUnit; // Execution stage 1 for the XU pipeline +def XEX2 : FuncUnit; // Execution stage 2 for the XU pipeline +def XEX3 : FuncUnit; // Execution stage 3 for the XU pipeline +def XEX4 : FuncUnit; // Execution stage 4 for the XU pipeline +def XEX5 : FuncUnit; // Execution stage 5 for the XU pipeline +def XEX6 : FuncUnit; // Execution stage 6 for the XU pipeline +def FRF1 : FuncUnit; +def FEX1 : FuncUnit; // Execution stage 1 for the FU pipeline +def FEX2 : FuncUnit; // Execution stage 2 for the FU pipeline +def FEX3 : FuncUnit; // Execution stage 3 for the FU pipeline +def FEX4 : FuncUnit; // Execution stage 4 for the FU pipeline +def FEX5 : FuncUnit; // Execution stage 5 for the FU pipeline +def FEX6 : FuncUnit; // Execution stage 6 for the FU pipeline + +def CR_Bypass : Bypass; // The bypass for condition regs. +//def GPR_Bypass : Bypass; // The bypass for general-purpose regs. +//def FPR_Bypass : Bypass; // The bypass for floating-point regs. + +// +// This file defines the itinerary class data for the PPC A2 processor. +// +//===----------------------------------------------------------------------===// + + +def PPCA2Itineraries : ProcessorItineraries< + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3, + IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7, + IU5, IU6, RF0, XRF1, XEX1, XEX2, XEX3, XEX4, XEX5, XEX6, + FRF1, FEX1, FEX2, FEX3, FEX4, FEX5, FEX6], + [CR_Bypass, GPR_Bypass, FPR_Bypass], [ + InstrItinData<IntGeneral , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntCompare , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [CR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntDivW , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<38, [XEX6]>], + [53, 7, 7], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMFFS , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMTFSB0 , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHW , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHWU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulLI , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotate , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntShift , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntTrapW , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<BrB , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<BrCR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [CR_Bypass, CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [CR_Bypass, CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCRX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [CR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBA , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 11], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStDCBF , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 11], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStDCBI , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 11], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLoad , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStStore , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStICBI , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStUX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [NoBypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<LdStLFD , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [FPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLFDU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [FPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLHA , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLMW , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLWARX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [26, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTD , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTDCX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [26, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTD , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTDCX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [26, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTWCX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [26, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSync , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<12, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>]>, + InstrItinData<SprISYNC , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>, + InstrItinData<SprMFSR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [GPR_Bypass, NoBypass]>, + InstrItinData<SprMTMSR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprTLBSYNC , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>, + InstrItinData<SprMFCR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7], + [GPR_Bypass, CR_Bypass]>, + InstrItinData<SprMFMSR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [GPR_Bypass, NoBypass]>, + InstrItinData<SprMFSPR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMFTB , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], + [29, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSPR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSRIN , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], + [29, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprRFI , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], + [29, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprSC , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], + [29, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<FPGeneral , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, + InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, + InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, + InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], + [15, 7, 7], + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPCompare , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, + InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, + InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, + InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], + [13, 7, 7], + [CR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivD , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<71, [FRF1], 0>, + InstrStage<71, [FEX1], 0>, + InstrStage<71, [FEX2], 0>, + InstrStage<71, [FEX3], 0>, + InstrStage<71, [FEX4], 0>, + InstrStage<71, [FEX5], 0>, + InstrStage<71, [FEX6]>], + [86, 7, 7], + [NoBypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivS , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<58, [FRF1], 0>, + InstrStage<58, [FEX1], 0>, + InstrStage<58, [FEX2], 0>, + InstrStage<58, [FEX3], 0>, + InstrStage<58, [FEX4], 0>, + InstrStage<58, [FEX5], 0>, + InstrStage<58, [FEX6]>], + [73, 7, 7], + [NoBypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPSqrt , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<68, [FRF1], 0>, + InstrStage<68, [FEX1], 0>, + InstrStage<68, [FEX2], 0>, + InstrStage<68, [FEX3], 0>, + InstrStage<68, [FEX4], 0>, + InstrStage<68, [FEX5], 0>, + InstrStage<68, [FEX6]>], + [86, 7], // FIXME: should be [86, 7] for double + // and [82, 7] for single. Likewise, + // the FEX? cycle count should be 68 + // for double and 64 for single. + [NoBypass, FPR_Bypass]>, + InstrItinData<FPFused , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, + InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, + InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, + InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], + [15, 7, 7, 7], + [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPRes , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, + InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, + InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, + InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], + [15, 7], + [FPR_Bypass, FPR_Bypass]> +]>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td index ad4da1f..bc926f7 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td @@ -1,10 +1,10 @@ -//===- PPCScheduleG3.td - PPC G3 Scheduling Definitions ----*- tablegen -*-===// -// +//===-- PPCScheduleG3.td - PPC G3 Scheduling Definitions ---*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines the itinerary class data for the G3 (750) processor. @@ -32,7 +32,8 @@ def G3Itineraries : ProcessorItineraries< InstrItinData<LdStDCBA , [InstrStage<2, [SLU]>]>, InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>, InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td index 03c3b29..f7ec1e0 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td @@ -1,10 +1,10 @@ -//===- PPCScheduleG4.td - PPC G4 Scheduling Definitions ----*- tablegen -*-===// -// +//===-- PPCScheduleG4.td - PPC G4 Scheduling Definitions ---*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines the itinerary class data for the G4 (7400) processor. @@ -31,7 +31,8 @@ def G4Itineraries : ProcessorItineraries< InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>, InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>, InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td index 00cac3c..37ebfc5 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -1,10 +1,10 @@ -//===- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. -----*- tablegen -*-===// -// +//===-- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. ----*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines the itinerary class data for the G4+ (7450) processor. @@ -34,7 +34,8 @@ def G4PlusItineraries : ProcessorItineraries< InstrItinData<BrMCRX , [InstrStage<2, [IU2]>]>, InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>, InstrItinData<LdStUX , [InstrStage<3, [SLU]>]>, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td index 1671f22..d1e40ce 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td @@ -1,10 +1,10 @@ -//===- PPCScheduleG5.td - PPC G5 Scheduling Definitions ----*- tablegen -*-===// -// +//===-- PPCScheduleG5.td - PPC G5 Scheduling Definitions ---*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines the itinerary class data for the G5 (970) processor. @@ -35,7 +35,8 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<BrMCR , [InstrStage<2, [BPU]>]>, InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>, InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>, InstrItinData<LdStUX , [InstrStage<4, [SLU]>]>, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index cf194de..f405b47 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -1,4 +1,4 @@ -//===- PowerPCSubtarget.cpp - PPC Subtarget Information -------------------===// +//===-- PowerPCSubtarget.cpp - PPC Subtarget Information ------------------===// // // The LLVM Compiler Infrastructure // @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "PPCSubtarget.h" +#include "PPCRegisterInfo.h" #include "PPC.h" #include "llvm/GlobalValue.h" #include "llvm/Target/TargetMachine.h" @@ -74,6 +75,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, , HasAltivec(false) , HasFSQRT(false) , HasSTFIWX(false) + , IsBookE(false) , HasLazyResolverStubs(false) , IsJITCodeModel(false) , TargetTriple(TT) { @@ -139,3 +141,23 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV, return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || GV->hasCommonLinkage() || isDecl; } + +bool PPCSubtarget::enablePostRAScheduler( + CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const { + if (DarwinDirective == PPC::DIR_440 || DarwinDirective == PPC::DIR_A2) + Mode = TargetSubtargetInfo::ANTIDEP_ALL; + else + Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; + + CriticalPathRCs.clear(); + + if (isPPC64()) + CriticalPathRCs.push_back(&PPC::G8RCRegClass); + else + CriticalPathRCs.push_back(&PPC::GPRCRegClass); + + return OptLevel >= CodeGenOpt::Default; +} + diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h index e028de6..a275029 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -1,4 +1,4 @@ -//=====-- PPCSubtarget.h - Define Subtarget for the PPC -------*- C++ -*--====// +//===-- PPCSubtarget.h - Define Subtarget for the PPC ----------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -33,12 +33,14 @@ namespace PPC { enum { DIR_NONE, DIR_32, + DIR_440, DIR_601, DIR_602, DIR_603, DIR_7400, DIR_750, DIR_970, + DIR_A2, DIR_64 }; } @@ -66,6 +68,7 @@ protected: bool HasAltivec; bool HasFSQRT; bool HasSTFIWX; + bool IsBookE; bool HasLazyResolverStubs; bool IsJITCodeModel; @@ -136,15 +139,22 @@ public: bool hasSTFIWX() const { return HasSTFIWX; } bool hasAltivec() const { return HasAltivec; } bool isGigaProcessor() const { return IsGigaProcessor; } + bool isBookE() const { return IsBookE; } const Triple &getTargetTriple() const { return TargetTriple; } /// isDarwin - True if this is any darwin platform. bool isDarwin() const { return TargetTriple.isMacOSX(); } + /// isBGP - True if this is a BG/P platform. + bool isBGP() const { return TargetTriple.getVendor() == Triple::BGP; } bool isDarwinABI() const { return isDarwin(); } bool isSVR4ABI() const { return !isDarwin(); } + /// enablePostRAScheduler - True at 'More' optimization. + bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const; }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index f5744b8..d113976 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -11,10 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "PPCTargetMachine.h" +#include "PPC.h" #include "llvm/PassManager.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" @@ -22,37 +23,46 @@ using namespace llvm; extern "C" void LLVMInitializePowerPCTarget() { // Register the targets - RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target); + RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target); RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target); } PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, is64Bit), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { + + // The binutils for the BG/P are too old for CFI. + if (Subtarget.isBGP()) + setMCUseCFI(false); } -/// Override this for PowerPC. Tail merging happily breaks up instruction issue -/// groups, which typically degrades performance. -bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; } +void PPC32TargetMachine::anchor() { } -PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT, +PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : PPCTargetMachine(T, TT, CPU, FS, RM, CM, false) { + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) { } +void PPC64TargetMachine::anchor() { } -PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT, +PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : PPCTargetMachine(T, TT, CPU, FS, RM, CM, true) { + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) { } @@ -60,33 +70,56 @@ PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT, // Pass Pipeline Configuration //===----------------------------------------------------------------------===// -bool PPCTargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +namespace { +/// PPC Code Generator Pass Configuration Options. +class PPCPassConfig : public TargetPassConfig { +public: + PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + PPCTargetMachine &getPPCTargetMachine() const { + return getTM<PPCTargetMachine>(); + } + + virtual bool addInstSelector(); + virtual bool addPreEmitPass(); +}; +} // namespace + +TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { + TargetPassConfig *PassConfig = new PPCPassConfig(this, PM); + + // Override this for PowerPC. Tail merging happily breaks up instruction issue + // groups, which typically degrades performance. + PassConfig->setEnableTailMerge(false); + + return PassConfig; +} + +bool PPCPassConfig::addInstSelector() { // Install an instruction selector. - PM.add(createPPCISelDag(*this)); + PM.add(createPPCISelDag(getPPCTargetMachine())); return false; } -bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool PPCPassConfig::addPreEmitPass() { // Must run branch selection immediately preceding the asm printer. PM.add(createPPCBranchSelectionPass()); return false; } bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { // FIXME: This should be moved to TargetJITInfo!! if (Subtarget.isPPC64()) // Temporary workaround for the inability of PPC64 JIT to handle jump // tables. - DisableJumpTables = true; - + Options.DisableJumpTables = true; + // Inform the subtarget that we are in JIT mode. FIXME: does this break macho // writing? Subtarget.SetJITMode(); - + // Machine code emitter pass for PowerPC. PM.add(createPPCJITCodeEmitterPass(*this, JCE)); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h index d06f084..7da2b0c 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h @@ -1,4 +1,4 @@ -//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC -----*- C++ -*-=// +//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -24,8 +24,6 @@ #include "llvm/Target/TargetData.h" namespace llvm { -class PassManager; -class GlobalValue; /// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets. /// @@ -41,15 +39,16 @@ class PPCTargetMachine : public LLVMTargetMachine { public: PPCTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM, bool is64Bit); + StringRef CPU, StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64Bit); virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const PPCFrameLowering *getFrameLowering() const { return &FrameLowering; } virtual PPCJITInfo *getJITInfo() { return &JITInfo; } - virtual const PPCTargetLowering *getTargetLowering() const { + virtual const PPCTargetLowering *getTargetLowering() const { return &TLInfo; } virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const { @@ -58,37 +57,39 @@ public: virtual const PPCRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } - + virtual const TargetData *getTargetData() const { return &DataLayout; } virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; } - virtual const InstrItineraryData *getInstrItineraryData() const { + virtual const InstrItineraryData *getInstrItineraryData() const { return &InstrItins; } // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); - virtual bool getEnableTailMergeDefault() const; }; /// PPC32TargetMachine - PowerPC 32-bit target machine. /// class PPC32TargetMachine : public PPCTargetMachine { + virtual void anchor(); public: PPC32TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + StringRef CPU, StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; /// PPC64TargetMachine - PowerPC 64-bit target machine. /// class PPC64TargetMachine : public PPCTargetMachine { + virtual void anchor(); public: PPC64TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + StringRef CPU, StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; } // end namespace llvm |