diff options
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC')
45 files changed, 4642 insertions, 2340 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 3d58306..bacc108 100644 --- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -13,7 +13,7 @@ #define DEBUG_TYPE "asm-printer" #include "PPCInstPrinter.h" -#include "MCTargetDesc/PPCBaseInfo.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -87,35 +87,9 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, const char *Modifier) { unsigned Code = MI->getOperand(OpNo).getImm(); - if (!Modifier) { - unsigned CCReg = MI->getOperand(OpNo+1).getReg(); - unsigned RegNo; - switch (CCReg) { - default: llvm_unreachable("Unknown CR register"); - case PPC::CR0: RegNo = 0; break; - case PPC::CR1: RegNo = 1; break; - case PPC::CR2: RegNo = 2; break; - case PPC::CR3: RegNo = 3; break; - case PPC::CR4: RegNo = 4; break; - case PPC::CR5: RegNo = 5; break; - case PPC::CR6: RegNo = 6; break; - case PPC::CR7: RegNo = 7; break; - } - - // Print the CR bit number. The Code is ((BI << 5) | BO) for a - // BCC, but we must have the positive form here (BO == 12) - unsigned BI = Code >> 5; - assert((Code & 0xF) == 12 && - "BO in predicate bit must have the positive form"); - - unsigned Value = 4*RegNo + BI; - O << Value; - return; - } if (StringRef(Modifier) == "cc") { switch ((PPC::Predicate)Code) { - case PPC::PRED_ALWAYS: return; // Don't print anything for always. case PPC::PRED_LT: O << "lt"; return; case PPC::PRED_LE: O << "le"; return; case PPC::PRED_EQ: O << "eq"; return; @@ -129,8 +103,6 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, assert(StringRef(Modifier) == "reg" && "Need to specify 'cc' or 'reg' as predicate op modifier!"); - // Don't print the register for 'always'. - if (Code == PPC::PRED_ALWAYS) return; printOperand(MI, OpNo+1, O); } diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 87ecb13..ec26574 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -13,8 +13,8 @@ #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCMachObjectWriter.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" #include "llvm/Object/MachOFormat.h" #include "llvm/Support/ELF.h" @@ -30,11 +30,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case FK_Data_2: case FK_Data_4: case FK_Data_8: - case PPC::fixup_ppc_toc: + case PPC::fixup_ppc_tlsreg: + case PPC::fixup_ppc_nofixup: return Value; - case PPC::fixup_ppc_lo14: - case PPC::fixup_ppc_toc16_ds: - return (Value & 0xffff) << 2; case PPC::fixup_ppc_brcond14: return Value & 0xfffc; case PPC::fixup_ppc_br24: @@ -46,8 +44,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case PPC::fixup_ppc_ha16: return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff; case PPC::fixup_ppc_lo16: - case PPC::fixup_ppc_toc16: return Value & 0xffff; + case PPC::fixup_ppc_lo16_ds: + return Value & 0xfffc; } } @@ -61,7 +60,9 @@ public: void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, uint64_t &FixedValue) {} + MCValue Target, uint64_t &FixedValue) { + llvm_unreachable("Relocation emission for MachO/PPC unimplemented!"); + } }; class PPCAsmBackend : public MCAsmBackend { @@ -78,10 +79,9 @@ public: { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_lo16", 16, 16, 0 }, { "fixup_ppc_ha16", 16, 16, 0 }, - { "fixup_ppc_lo14", 16, 14, 0 }, - { "fixup_ppc_toc", 0, 64, 0 }, - { "fixup_ppc_toc16", 16, 16, 0 }, - { "fixup_ppc_toc16_ds", 16, 14, 0 } + { "fixup_ppc_lo16_ds", 16, 14, 0 }, + { "fixup_ppc_tlsreg", 0, 0, 0 }, + { "fixup_ppc_nofixup", 0, 0, 0 } }; if (Kind < FirstTargetFixupKind) @@ -92,6 +92,20 @@ public: return Infos[Kind - FirstTargetFixupKind]; } + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const { + Value = adjustFixupValue(Fixup.getKind(), Value); + if (!Value) return; // Doesn't change encoding. + + unsigned Offset = Fixup.getOffset(); + + // For each byte of the fragment that the fixup touches, mask in the bits + // from the fixup value. The Value has been "split up" into the appropriate + // bitfields above. + for (unsigned i = 0; i != 4; ++i) + Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff); + } + bool mayNeedRelaxation(const MCInst &Inst) const { // FIXME. return false; @@ -99,7 +113,7 @@ public: bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCInstFragment *DF, + const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const { // FIXME. llvm_unreachable("relaxInstruction() unimplemented"); @@ -135,11 +149,6 @@ namespace { public: DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { } - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value) const { - llvm_unreachable("UNIMP"); - } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { bool is64 = getPointerSize() == 8; return createMachObjectWriter(new PPCMachObjectWriter( @@ -161,19 +170,6 @@ namespace { ELFPPCAsmBackend(const Target &T, uint8_t OSABI) : PPCAsmBackend(T), OSABI(OSABI) { } - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value) const { - Value = adjustFixupValue(Fixup.getKind(), Value); - if (!Value) return; // Doesn't change encoding. - - unsigned Offset = Fixup.getOffset(); - - // For each byte of the fragment that the fixup touches, mask in the bits from - // the fixup value. The Value has been "split up" into the appropriate - // bitfields above. - for (unsigned i = 0; i != 4; ++i) - Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff); - } MCObjectWriter *createObjectWriter(raw_ostream &OS) const { bool is64 = getPointerSize() == 8; diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h deleted file mode 100644 index 9c975c0..0000000 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h +++ /dev/null @@ -1,70 +0,0 @@ -//===-- PPCBaseInfo.h - Top level definitions for PPC -----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains small standalone helper functions and enum definitions for -// the PPC target useful for the compiler back-end and the MC libraries. -// As such, it deliberately does not include references to LLVM core -// code gen types, passes, etc.. -// -//===----------------------------------------------------------------------===// - -#ifndef PPCBASEINFO_H -#define PPCBASEINFO_H - -#include "PPCMCTargetDesc.h" -#include "llvm/Support/ErrorHandling.h" - -namespace llvm { - -/// getPPCRegisterNumbering - Given the enum value for some register, e.g. -/// PPC::F14, return the number that it corresponds to (e.g. 14). -inline static unsigned getPPCRegisterNumbering(unsigned RegEnum) { - using namespace PPC; - switch (RegEnum) { - case 0: return 0; - case R0 : case X0 : case F0 : case V0 : case CR0: case CR0LT: return 0; - case R1 : case X1 : case F1 : case V1 : case CR1: case CR0GT: return 1; - case R2 : case X2 : case F2 : case V2 : case CR2: case CR0EQ: return 2; - case R3 : case X3 : case F3 : case V3 : case CR3: case CR0UN: return 3; - case R4 : case X4 : case F4 : case V4 : case CR4: case CR1LT: return 4; - case R5 : case X5 : case F5 : case V5 : case CR5: case CR1GT: return 5; - case R6 : case X6 : case F6 : case V6 : case CR6: case CR1EQ: return 6; - case R7 : case X7 : case F7 : case V7 : case CR7: case CR1UN: return 7; - case R8 : case X8 : case F8 : case V8 : case CR2LT: return 8; - case R9 : case X9 : case F9 : case V9 : case CR2GT: return 9; - case R10: case X10: case F10: case V10: case CR2EQ: return 10; - case R11: case X11: case F11: case V11: case CR2UN: return 11; - case R12: case X12: case F12: case V12: case CR3LT: return 12; - case R13: case X13: case F13: case V13: case CR3GT: return 13; - case R14: case X14: case F14: case V14: case CR3EQ: return 14; - case R15: case X15: case F15: case V15: case CR3UN: return 15; - case R16: case X16: case F16: case V16: case CR4LT: return 16; - case R17: case X17: case F17: case V17: case CR4GT: return 17; - case R18: case X18: case F18: case V18: case CR4EQ: return 18; - case R19: case X19: case F19: case V19: case CR4UN: return 19; - case R20: case X20: case F20: case V20: case CR5LT: return 20; - case R21: case X21: case F21: case V21: case CR5GT: return 21; - case R22: case X22: case F22: case V22: case CR5EQ: return 22; - case R23: case X23: case F23: case V23: case CR5UN: return 23; - case R24: case X24: case F24: case V24: case CR6LT: return 24; - case R25: case X25: case F25: case V25: case CR6GT: return 25; - case R26: case X26: case F26: case V26: case CR6EQ: return 26; - case R27: case X27: case F27: case V27: case CR6UN: return 27; - case R28: case X28: case F28: case V28: case CR7LT: return 28; - case R29: case X29: case F29: case V29: case CR7GT: return 29; - case R30: case X30: case F30: case V30: case CR7EQ: return 30; - case R31: case X31: case F31: case V31: case CR7UN: return 31; - default: - llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!"); - } -} - -} // end namespace llvm; - -#endif diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index dc93f71..84e4175 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -7,12 +7,13 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/PPCFixupKinds.h" #include "MCTargetDesc/PPCMCTargetDesc.h" +#include "MCTargetDesc/PPCFixupKinds.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCELFObjectWriter.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -33,9 +34,25 @@ namespace { const MCFixup &Fixup, bool IsPCRel) const; virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset); + + virtual void sortRelocs(const MCAssembler &Asm, + std::vector<ELFRelocationEntry> &Relocs); + }; + + class PPCELFRelocationEntry : public ELFRelocationEntry { + public: + PPCELFRelocationEntry(const ELFRelocationEntry &RE); + bool operator<(const PPCELFRelocationEntry &RE) const { + return (RE.r_offset < r_offset || + (RE.r_offset == r_offset && RE.Type > Type)); + } }; } +PPCELFRelocationEntry::PPCELFRelocationEntry(const ELFRelocationEntry &RE) + : ELFRelocationEntry(RE.r_offset, RE.Index, RE.Type, RE.Symbol, + RE.r_addend, *RE.Fixup) {} + PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) : MCELFObjectTargetWriter(Is64Bit, OSABI, Is64Bit ? ELF::EM_PPC64 : ELF::EM_PPC, @@ -60,9 +77,14 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case PPC::fixup_ppc_br24: Type = ELF::R_PPC_REL24; break; + case FK_Data_4: case FK_PCRel_4: Type = ELF::R_PPC_REL32; break; + case FK_Data_8: + case FK_PCRel_8: + Type = ELF::R_PPC64_REL64; + break; } } else { switch ((unsigned)Fixup.getKind()) { @@ -79,9 +101,24 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_TPREL16_HA: Type = ELF::R_PPC_TPREL16_HA; break; + case MCSymbolRefExpr::VK_PPC_DTPREL16_HA: + Type = ELF::R_PPC64_DTPREL16_HA; + break; case MCSymbolRefExpr::VK_None: Type = ELF::R_PPC_ADDR16_HA; break; + case MCSymbolRefExpr::VK_PPC_TOC16_HA: + Type = ELF::R_PPC64_TOC16_HA; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA: + Type = ELF::R_PPC64_GOT_TPREL16_HA; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA: + Type = ELF::R_PPC64_GOT_TLSGD16_HA; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA: + Type = ELF::R_PPC64_GOT_TLSLD16_HA; + break; } break; case PPC::fixup_ppc_lo16: @@ -90,22 +127,56 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_PPC_TPREL16_LO: Type = ELF::R_PPC_TPREL16_LO; break; + case MCSymbolRefExpr::VK_PPC_DTPREL16_LO: + Type = ELF::R_PPC64_DTPREL16_LO; + break; case MCSymbolRefExpr::VK_None: Type = ELF::R_PPC_ADDR16_LO; break; + case MCSymbolRefExpr::VK_PPC_TOC_ENTRY: + Type = ELF::R_PPC64_TOC16; + break; + case MCSymbolRefExpr::VK_PPC_TOC16_LO: + Type = ELF::R_PPC64_TOC16_LO; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO: + Type = ELF::R_PPC64_GOT_TLSGD16_LO; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO: + Type = ELF::R_PPC64_GOT_TLSLD16_LO; + break; } break; - case PPC::fixup_ppc_lo14: - Type = ELF::R_PPC_ADDR14; - break; - case PPC::fixup_ppc_toc: - Type = ELF::R_PPC64_TOC; + case PPC::fixup_ppc_lo16_ds: + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_PPC64_ADDR16_DS; + break; + case MCSymbolRefExpr::VK_PPC_TOC_ENTRY: + Type = ELF::R_PPC64_TOC16_DS; + break; + case MCSymbolRefExpr::VK_PPC_TOC16_LO: + Type = ELF::R_PPC64_TOC16_LO_DS; + break; + case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO: + Type = ELF::R_PPC64_GOT_TPREL16_LO_DS; + break; + } break; - case PPC::fixup_ppc_toc16: - Type = ELF::R_PPC64_TOC16; + case PPC::fixup_ppc_tlsreg: + Type = ELF::R_PPC64_TLS; break; - case PPC::fixup_ppc_toc16_ds: - Type = ELF::R_PPC64_TOC16_DS; + case PPC::fixup_ppc_nofixup: + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_PPC_TLSGD: + Type = ELF::R_PPC64_TLSGD; + break; + case MCSymbolRefExpr::VK_PPC_TLSLD: + Type = ELF::R_PPC64_TLSLD; + break; + } break; case FK_Data_8: switch (Modifier) { @@ -162,8 +233,7 @@ adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { switch ((unsigned)Fixup.getKind()) { case PPC::fixup_ppc_ha16: case PPC::fixup_ppc_lo16: - case PPC::fixup_ppc_toc16: - case PPC::fixup_ppc_toc16_ds: + case PPC::fixup_ppc_lo16_ds: RelocOffset += 2; break; default: @@ -171,6 +241,34 @@ adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { } } +// The standard sorter only sorts on the r_offset field, but PowerPC can +// have multiple relocations at the same offset. Sort secondarily on the +// relocation type to avoid nondeterminism. +void PPCELFObjectWriter::sortRelocs(const MCAssembler &Asm, + std::vector<ELFRelocationEntry> &Relocs) { + + // Copy to a temporary vector of relocation entries having a different + // sort function. + std::vector<PPCELFRelocationEntry> TmpRelocs; + + for (std::vector<ELFRelocationEntry>::iterator R = Relocs.begin(); + R != Relocs.end(); ++R) { + TmpRelocs.push_back(PPCELFRelocationEntry(*R)); + } + + // Sort in place by ascending r_offset and descending r_type. + array_pod_sort(TmpRelocs.begin(), TmpRelocs.end()); + + // Copy back to the original vector. + unsigned I = 0; + for (std::vector<PPCELFRelocationEntry>::iterator R = TmpRelocs.begin(); + R != TmpRelocs.end(); ++R, ++I) { + Relocs[I] = ELFRelocationEntry(R->r_offset, R->Index, R->Type, + R->Symbol, R->r_addend, *R->Fixup); + } +} + + MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS, bool Is64Bit, uint8_t OSABI) { diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index 37b265e..86c44f5 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -12,6 +12,8 @@ #include "llvm/MC/MCFixup.h" +#undef PPC + namespace llvm { namespace PPC { enum Fixups { @@ -31,19 +33,16 @@ enum Fixups { /// like 'lis'. fixup_ppc_ha16, - /// fixup_ppc_lo14 - A 14-bit fixup corresponding to lo16(_foo) for instrs - /// like 'std'. - fixup_ppc_lo14, - - /// fixup_ppc_toc - Insert value of TOC base (.TOC.). - fixup_ppc_toc, + /// fixup_ppc_lo16_ds - A 14-bit fixup corresponding to lo16(_foo) with + /// implied 2 zero bits for instrs like 'std'. + fixup_ppc_lo16_ds, - /// fixup_ppc_toc16 - A 16-bit signed fixup relative to the TOC base. - fixup_ppc_toc16, + /// fixup_ppc_tlsreg - Insert thread-pointer register number. + fixup_ppc_tlsreg, - /// fixup_ppc_toc16_ds - A 14-bit signed fixup relative to the TOC base with - /// implied 2 zero bits - fixup_ppc_toc16_ds, + /// fixup_ppc_nofixup - Not a true fixup, but ties a symbol to a call + /// to __tls_get_addr for the TLS general and local dynamic models. + fixup_ppc_nofixup, // Marker LastTargetFixupKind, diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 215aa40..a25d7fe 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -17,8 +17,9 @@ using namespace llvm; void PPCMCAsmInfoDarwin::anchor() { } PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { - if (is64Bit) - PointerSize = 8; + if (is64Bit) { + PointerSize = CalleeSaveStackSlotSize = 8; + } IsLittleEndian = false; PCSymbol = "."; @@ -35,8 +36,9 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { void PPCLinuxMCAsmInfo::anchor() { } PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { - if (is64Bit) - PointerSize = 8; + if (is64Bit) { + PointerSize = CalleeSaveStackSlotSize = 8; + } IsLittleEndian = false; // ".comm align is in bytes but .align is pow-2." diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 2118302..2223cd6 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -12,15 +12,17 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mccodeemitter" -#include "MCTargetDesc/PPCBaseInfo.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" +#include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); @@ -31,24 +33,17 @@ class PPCMCCodeEmitter : public MCCodeEmitter { void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCSubtargetInfo &STI; + const MCContext &CTX; Triple TT; public: PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, MCContext &ctx) - : STI(sti), TT(STI.getTargetTriple()) { + : STI(sti), CTX(ctx), TT(STI.getTargetTriple()) { } ~PPCMCCodeEmitter() {} - bool is64BitMode() const { - return (STI.getFeatureBits() & PPC::Feature64Bit) != 0; - } - - bool isSVR4ABI() const { - return TT.isMacOSX() == 0; - } - unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const; unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo, @@ -61,6 +56,8 @@ public: SmallVectorImpl<MCFixup> &Fixups) const; unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const; unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const; @@ -77,11 +74,11 @@ public: SmallVectorImpl<MCFixup> &Fixups) const { uint64_t Bits = getBinaryCodeForInstr(MI, Fixups); - // BL8_NOPELF and BLA8_NOP_ELF is both size of 8 bacause of the - // following 'nop'. + // BL8_NOP etc. all have a size of 8 because of the following 'nop'. unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value! unsigned Opcode = MI.getOpcode(); - if (Opcode == PPC::BL8_NOP_ELF || Opcode == PPC::BLA8_NOP_ELF) + if (Opcode == PPC::BL8_NOP || Opcode == PPC::BLA8_NOP || + Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD) Size = 8; // Output the constant in big endian byte order. @@ -114,6 +111,17 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo, // Add a fixup for the branch target. Fixups.push_back(MCFixup::Create(0, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_br24)); + + // For special TLS calls, add another fixup for the symbol. Apparently + // BL8_NOP, BL8_NOP_TLSGD, and BL8_NOP_TLSLD are sufficiently + // similar that TblGen will not generate a separate case for the latter + // two, so this is the only way to get the extra fixup generated. + unsigned Opcode = MI.getOpcode(); + if (Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD) { + const MCOperand &MO2 = MI.getOperand(OpNo+1); + Fixups.push_back(MCFixup::Create(0, MO2.getExpr(), + (MCFixupKind)PPC::fixup_ppc_nofixup)); + } return 0; } @@ -162,12 +170,8 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo, return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits; // Add a fixup for the displacement field. - if (isSVR4ABI() && is64BitMode()) - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_toc16)); - else - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_lo16)); + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_lo16)); return RegBits; } @@ -183,17 +187,26 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo, if (MO.isImm()) return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits; - // Add a fixup for the branch target. - if (isSVR4ABI() && is64BitMode()) - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_toc16_ds)); - else - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_lo14)); + // Add a fixup for the displacement field. + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_lo16_ds)); return RegBits; } +unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups); + + // Add a fixup for the TLS register, which simply provides a relocation + // hint to the linker that this statement is part of a relocation sequence. + // Return the thread-pointer register's encoding. + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_tlsreg)); + return CTX.getRegisterInfo().getEncodingValue(PPC::X13); +} + unsigned PPCMCCodeEmitter:: get_crbitm_encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { @@ -202,7 +215,7 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo, MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MTCRF8) && (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); - return 0x80 >> getPPCRegisterNumbering(MO.getReg()); + return 0x80 >> CTX.getRegisterInfo().getEncodingValue(MO.getReg()); } @@ -214,7 +227,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, // The GPR operand should come through here though. assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) || MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); - return getPPCRegisterNumbering(MO.getReg()); + return CTX.getRegisterInfo().getEncodingValue(MO.getReg()); } assert(MO.isImm() && diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 4c2578d..2209f93 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -12,14 +12,14 @@ //===----------------------------------------------------------------------===// #include "PPCMCTargetDesc.h" -#include "PPCMCAsmInfo.h" #include "InstPrinter/PPCInstPrinter.h" -#include "llvm/MC/MachineLocation.h" +#include "PPCMCAsmInfo.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MachineLocation.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" @@ -88,6 +88,11 @@ static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM, else RM = Reloc::Static; } + if (CM == CodeModel::Default) { + Triple T(TT); + if (!T.isOSDarwin() && T.getArch() == Triple::ppc64) + CM = CodeModel::Medium; + } X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index a0e4cf3..38a7420 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -14,6 +14,9 @@ #ifndef PPCMCTARGETDESC_H #define PPCMCTARGETDESC_H +// GCC #defines PPC on Linux but we use it as our namespace name +#undef PPC + #include "llvm/Support/DataTypes.h" namespace llvm { @@ -44,6 +47,10 @@ MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS, uint8_t OSABI); } // End llvm namespace +// Generated files will use "namespace PPC". To avoid symbol clash, +// undefine PPC here. PPC may be predefined on some hosts. +#undef PPC + // Defines symbolic names for PowerPC registers. This defines a mapping from // register name to register number. // diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp index 12bb0a1..d84eb9c 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp @@ -18,7 +18,6 @@ using namespace llvm; PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) { switch (Opcode) { - default: llvm_unreachable("Unknown PPC branch opcode!"); case PPC::PRED_EQ: return PPC::PRED_NE; case PPC::PRED_NE: return PPC::PRED_EQ; case PPC::PRED_LT: return PPC::PRED_GE; @@ -28,4 +27,5 @@ PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) { case PPC::PRED_NU: return PPC::PRED_UN; case PPC::PRED_UN: return PPC::PRED_NU; } + llvm_unreachable("Unknown PPC branch opcode!"); } diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index f872e86..ad2b018 100644 --- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -14,11 +14,17 @@ #ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H #define LLVM_TARGET_POWERPC_PPCPREDICATES_H +// GCC #defines PPC on Linux but we use it as our namespace name +#undef PPC + +// Generated files will use "namespace PPC". To avoid symbol clash, +// undefine PPC here. PPC may be predefined on some hosts. +#undef PPC + namespace llvm { namespace PPC { /// Predicate - These are "(BI << 5) | BO" for various predicates. enum Predicate { - PRED_ALWAYS = (0 << 5) | 20, PRED_LT = (0 << 5) | 12, PRED_LE = (1 << 5) | 4, PRED_EQ = (2 << 5) | 12, diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h index 9103e12..446b685 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPC.h +++ b/contrib/llvm/lib/Target/PowerPC/PPC.h @@ -15,7 +15,6 @@ #ifndef LLVM_TARGET_POWERPC_H #define LLVM_TARGET_POWERPC_H -#include "MCTargetDesc/PPCBaseInfo.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include <string> @@ -25,6 +24,7 @@ namespace llvm { class PPCTargetMachine; class FunctionPass; + class ImmutablePass; class JITCodeEmitter; class MachineInstr; class AsmPrinter; @@ -37,6 +37,9 @@ namespace llvm { JITCodeEmitter &MCE); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin); + + /// \brief Creates an PPC-specific Target Transformation Info pass. + ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM); namespace PPCII { @@ -53,25 +56,32 @@ namespace llvm { /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to /// the function's picbase, e.g. lo16(symbol-picbase). - MO_PIC_FLAG = 4, + MO_PIC_FLAG = 2, /// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to /// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase). - MO_NLP_FLAG = 8, + MO_NLP_FLAG = 4, /// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a /// symbol with hidden visibility. This causes a different kind of /// non-lazy-pointer to be generated. - MO_NLP_HIDDEN_FLAG = 16, + MO_NLP_HIDDEN_FLAG = 8, /// The next are not flags but distinct values. - MO_ACCESS_MASK = 224, + MO_ACCESS_MASK = 0xf0, /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol) - MO_LO16 = 32, MO_HA16 = 64, + MO_LO16 = 1 << 4, + MO_HA16 = 2 << 4, + + MO_TPREL16_HA = 3 << 4, + MO_TPREL16_LO = 4 << 4, - MO_TPREL16_HA = 96, - MO_TPREL16_LO = 128 + /// These values identify relocations on immediates folded + /// into memory operations. + MO_DTPREL16_LO = 5 << 4, + MO_TLSLD16_LO = 6 << 4, + MO_TOC16_LO = 7 << 4 }; } // end namespace PPCII diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td index cb15dad..3892162 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPC.td +++ b/contrib/llvm/lib/Target/PowerPC/PPC.td @@ -39,7 +39,12 @@ def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_E500mc", "">; def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_E5500", "">; +def DirectivePwr3: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR3", "">; +def DirectivePwr4: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR4", "">; +def DirectivePwr5: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5", "">; +def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "">; def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">; +def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">; def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">; def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", @@ -52,12 +57,42 @@ def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true", "Enable the MFOCRF instruction">; def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", "Enable the fsqrt instruction">; +def FeatureFRE : SubtargetFeature<"fre", "HasFRE", "true", + "Enable the fre instruction">; +def FeatureFRES : SubtargetFeature<"fres", "HasFRES", "true", + "Enable the fres instruction">; +def FeatureFRSQRTE : SubtargetFeature<"frsqrte", "HasFRSQRTE", "true", + "Enable the frsqrte instruction">; +def FeatureFRSQRTES : SubtargetFeature<"frsqrtes", "HasFRSQRTES", "true", + "Enable the frsqrtes instruction">; +def FeatureRecipPrec : SubtargetFeature<"recipprec", "HasRecipPrec", "true", + "Assume higher precision reciprocal estimates">; def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true", "Enable the stfiwx instruction">; +def FeatureLFIWAX : SubtargetFeature<"lfiwax","HasLFIWAX", "true", + "Enable the lfiwax instruction">; +def FeatureFPRND : SubtargetFeature<"fprnd", "HasFPRND", "true", + "Enable the fri[mnpz] instructions">; +def FeatureFPCVT : SubtargetFeature<"fpcvt", "HasFPCVT", "true", + "Enable fc[ft]* (unsigned and single-precision) and lfiwzx instructions">; def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true", "Enable the isel instruction">; +def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true", + "Enable the popcnt[dw] instructions">; +def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true", + "Enable the ldbrx instruction">; def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true", "Enable Book E instructions">; +def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", + "Enable QPX instructions">; + +// Note: Future features to add when support is extended to more +// recent ISA levels: +// +// CMPB p6, p6x, p7 cmpb +// DFP p6, p6x, p7 decimal floating-point instructions +// POPCNTB p5 through p7 popcntb and related instructions +// VSX p7 vector-scalar instruction set //===----------------------------------------------------------------------===// // Register File Description @@ -73,30 +108,46 @@ include "PPCInstrInfo.td" def : Processor<"generic", G3Itineraries, [Directive32]>; def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL, + FeatureFRES, FeatureFRSQRTE, FeatureBookE]>; def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL, + FeatureFRES, FeatureFRSQRTE, FeatureBookE]>; def : Processor<"601", G3Itineraries, [Directive601]>; def : Processor<"602", G3Itineraries, [Directive602]>; -def : Processor<"603", G3Itineraries, [Directive603]>; -def : Processor<"603e", G3Itineraries, [Directive603]>; -def : Processor<"603ev", G3Itineraries, [Directive603]>; -def : Processor<"604", G3Itineraries, [Directive604]>; -def : Processor<"604e", G3Itineraries, [Directive604]>; -def : Processor<"620", G3Itineraries, [Directive620]>; -def : Processor<"750", G4Itineraries, [Directive750]>; -def : Processor<"g3", G3Itineraries, [Directive750]>; -def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec]>; -def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec]>; -def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec]>; -def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec]>; -def : Processor<"970", G5Itineraries, +def : Processor<"603", G3Itineraries, [Directive603, + FeatureFRES, FeatureFRSQRTE]>; +def : Processor<"603e", G3Itineraries, [Directive603, + FeatureFRES, FeatureFRSQRTE]>; +def : Processor<"603ev", G3Itineraries, [Directive603, + FeatureFRES, FeatureFRSQRTE]>; +def : Processor<"604", G3Itineraries, [Directive604, + FeatureFRES, FeatureFRSQRTE]>; +def : Processor<"604e", G3Itineraries, [Directive604, + FeatureFRES, FeatureFRSQRTE]>; +def : Processor<"620", G3Itineraries, [Directive620, + FeatureFRES, FeatureFRSQRTE]>; +def : Processor<"750", G4Itineraries, [Directive750, + FeatureFRES, FeatureFRSQRTE]>; +def : Processor<"g3", G3Itineraries, [Directive750, + FeatureFRES, FeatureFRSQRTE]>; +def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec, + FeatureFRES, FeatureFRSQRTE]>; +def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec, + FeatureFRES, FeatureFRSQRTE]>; +def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec, + FeatureFRES, FeatureFRSQRTE]>; +def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec, + FeatureFRES, FeatureFRSQRTE]>; +def : ProcessorModel<"970", G5Model, [Directive970, FeatureAltivec, - FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, + FeatureMFOCRF, FeatureFSqrt, + FeatureFRES, FeatureFRSQRTE, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */]>; -def : Processor<"g5", G5Itineraries, +def : ProcessorModel<"g5", G5Model, [Directive970, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, + FeatureFRES, FeatureFRSQRTE, Feature64Bit /*, Feature64BitRegs */]>; def : ProcessorModel<"e500mc", PPCE500mcModel, [DirectiveE500mc, FeatureMFOCRF, @@ -104,23 +155,65 @@ def : ProcessorModel<"e500mc", PPCE500mcModel, def : ProcessorModel<"e5500", PPCE5500Model, [DirectiveE5500, FeatureMFOCRF, Feature64Bit, FeatureSTFIWX, FeatureBookE, FeatureISEL]>; -def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, - FeatureMFOCRF, FeatureFSqrt, - FeatureSTFIWX, FeatureISEL, - Feature64Bit - /*, Feature64BitRegs */]>; -def : Processor<"pwr6", G5Itineraries, +def : ProcessorModel<"a2", PPCA2Model, + [DirectiveA2, FeatureBookE, FeatureMFOCRF, + FeatureFSqrt, FeatureFRE, FeatureFRES, + FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, + FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, FeatureFPCVT, FeatureISEL, + FeaturePOPCNTD, FeatureLDBRX, Feature64Bit + /*, Feature64BitRegs */]>; +def : ProcessorModel<"a2q", PPCA2Model, + [DirectiveA2, FeatureBookE, FeatureMFOCRF, + FeatureFSqrt, FeatureFRE, FeatureFRES, + FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, + FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, FeatureFPCVT, FeatureISEL, + FeaturePOPCNTD, FeatureLDBRX, Feature64Bit + /*, Feature64BitRegs */, FeatureQPX]>; +def : ProcessorModel<"pwr3", G5Model, + [DirectivePwr3, FeatureAltivec, + FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF, + FeatureSTFIWX, Feature64Bit]>; +def : ProcessorModel<"pwr4", G5Model, + [DirectivePwr4, FeatureAltivec, FeatureMFOCRF, + FeatureFSqrt, FeatureFRES, FeatureFRSQRTE, + FeatureSTFIWX, Feature64Bit]>; +def : ProcessorModel<"pwr5", G5Model, + [DirectivePwr5, FeatureAltivec, FeatureMFOCRF, + FeatureFSqrt, FeatureFRE, FeatureFRES, + FeatureFRSQRTE, FeatureFRSQRTES, + FeatureSTFIWX, Feature64Bit]>; +def : ProcessorModel<"pwr5x", G5Model, + [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, + FeatureFSqrt, FeatureFRE, FeatureFRES, + FeatureFRSQRTE, FeatureFRSQRTES, + FeatureSTFIWX, FeatureFPRND, Feature64Bit]>; +def : ProcessorModel<"pwr6", G5Model, [DirectivePwr6, FeatureAltivec, - FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, - Feature64Bit /*, Feature64BitRegs */]>; -def : Processor<"pwr7", G5Itineraries, + FeatureMFOCRF, FeatureFSqrt, FeatureFRE, + FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, + FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, Feature64Bit /*, Feature64BitRegs */]>; +def : ProcessorModel<"pwr6x", G5Model, + [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, + FeatureFSqrt, FeatureFRE, FeatureFRES, + FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, + FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, Feature64Bit]>; +def : ProcessorModel<"pwr7", G5Model, [DirectivePwr7, FeatureAltivec, - FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, - FeatureISEL, Feature64Bit /*, Feature64BitRegs */]>; + FeatureMFOCRF, FeatureFSqrt, FeatureFRE, + FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, + FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, FeatureFPCVT, FeatureISEL, + FeaturePOPCNTD, FeatureLDBRX, + Feature64Bit /*, Feature64BitRegs */]>; def : Processor<"ppc", G3Itineraries, [Directive32]>; -def : Processor<"ppc64", G5Itineraries, +def : ProcessorModel<"ppc64", G5Model, [Directive64, FeatureAltivec, - FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, + FeatureMFOCRF, FeatureFSqrt, FeatureFRES, + FeatureFRSQRTE, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */]>; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 15d690b..96a9f0a 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -18,14 +18,13 @@ #define DEBUG_TYPE "asmprinter" #include "PPC.h" -#include "PPCTargetMachine.h" -#include "PPCSubtarget.h" #include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCPredicates.h" -#include "llvm/Constants.h" -#include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" +#include "PPCSubtarget.h" +#include "PPCTargetMachine.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -33,28 +32,30 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/ELF.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/MapVector.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; namespace { @@ -72,6 +73,7 @@ namespace { return "PowerPC Assembly Printer"; } + MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym); virtual void EmitInstruction(const MachineInstr *MI); @@ -309,6 +311,25 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, } +/// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry +/// exists for it. If not, create one. Then return a symbol that references +/// the TOC entry. +MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) { + + MCSymbol *&TOCEntry = TOC[Sym]; + + // To avoid name clash check if the name already exists. + while (TOCEntry == 0) { + if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) + + "C" + Twine(TOCLabelID++)) == 0) { + TOCEntry = GetTempSymbol("C", TOCLabelID); + } + } + + return TOCEntry; +} + + /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to /// the current output stream. /// @@ -349,14 +370,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbol *PICBase = MF->getPICBaseSymbol(); // Emit the 'bl'. - TmpInst.setOpcode(PPC::BL_Darwin); // Darwin vs SVR4 doesn't matter here. - - - // FIXME: We would like an efficient form for this, so we don't have to do - // a lot of extra uniquing. - TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr:: - Create(PICBase, OutContext))); - OutStreamer.EmitInstruction(TmpInst); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL) + // FIXME: We would like an efficient form for this, so we don't have to do + // a lot of extra uniquing. + .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext))); // Emit the label. OutStreamer.EmitLabel(PICBase); @@ -382,14 +399,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MOSymbol = GetCPISymbol(MO.getIndex()); else if (MO.isJTI()) MOSymbol = GetJTISymbol(MO.getIndex()); - MCSymbol *&TOCEntry = TOC[MOSymbol]; - // To avoid name clash check if the name already exists. - while (TOCEntry == 0) { - if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) + - "C" + Twine(TOCLabelID++)) == 0) { - TOCEntry = GetTempSymbol("C", TOCLabelID); - } - } + + MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC_ENTRY, @@ -399,15 +410,299 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } + case PPC::ADDIStocHA: { + // Transform %Xd = ADDIStocHA %X2, <ga:@sym> + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + + // Change the opcode to ADDIS8. If the global address is external, + // has common linkage, is a function address, or is a jump table + // address, then generate a TOC entry and reference that. Otherwise + // reference the symbol directly. + TmpInst.setOpcode(PPC::ADDIS8); + const MachineOperand &MO = MI->getOperand(2); + assert((MO.isGlobal() || MO.isCPI() || MO.isJTI()) && + "Invalid operand for ADDIStocHA!"); + MCSymbol *MOSymbol = 0; + bool IsExternal = false; + bool IsFunction = false; + bool IsCommon = false; + bool IsAvailExt = false; + + if (MO.isGlobal()) { + const GlobalValue *GValue = MO.getGlobal(); + const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); + const GlobalValue *RealGValue = GAlias ? + GAlias->resolveAliasedGlobal(false) : GValue; + MOSymbol = Mang->getSymbol(RealGValue); + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); + IsExternal = GVar && !GVar->hasInitializer(); + IsCommon = GVar && RealGValue->hasCommonLinkage(); + IsFunction = !GVar; + IsAvailExt = GVar && RealGValue->hasAvailableExternallyLinkage(); + } else if (MO.isCPI()) + MOSymbol = GetCPISymbol(MO.getIndex()); + else if (MO.isJTI()) + MOSymbol = GetJTISymbol(MO.getIndex()); + + if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI()) + MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); + + const MCExpr *Exp = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_HA, + OutContext); + TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case PPC::LDtocL: { + // Transform %Xd = LDtocL <ga:@sym>, %Xs + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + + // Change the opcode to LD. If the global address is external, has + // common linkage, or is a jump table address, then reference the + // associated TOC entry. Otherwise reference the symbol directly. + TmpInst.setOpcode(PPC::LD); + const MachineOperand &MO = MI->getOperand(1); + assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) && + "Invalid operand for LDtocL!"); + MCSymbol *MOSymbol = 0; + + if (MO.isJTI()) + MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex())); + else if (MO.isCPI()) + MOSymbol = GetCPISymbol(MO.getIndex()); + else if (MO.isGlobal()) { + const GlobalValue *GValue = MO.getGlobal(); + const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); + const GlobalValue *RealGValue = GAlias ? + GAlias->resolveAliasedGlobal(false) : GValue; + MOSymbol = Mang->getSymbol(RealGValue); + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); + + if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() || + RealGValue->hasAvailableExternallyLinkage()) + MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); + } + + const MCExpr *Exp = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_LO, + OutContext); + TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case PPC::ADDItocL: { + // Transform %Xd = ADDItocL %Xs, <ga:@sym> + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + + // Change the opcode to ADDI8. If the global address is external, then + // generate a TOC entry and reference that. Otherwise reference the + // symbol directly. + TmpInst.setOpcode(PPC::ADDI8); + const MachineOperand &MO = MI->getOperand(2); + assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL"); + MCSymbol *MOSymbol = 0; + bool IsExternal = false; + bool IsFunction = false; + + if (MO.isGlobal()) { + const GlobalValue *GValue = MO.getGlobal(); + const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); + const GlobalValue *RealGValue = GAlias ? + GAlias->resolveAliasedGlobal(false) : GValue; + MOSymbol = Mang->getSymbol(RealGValue); + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); + IsExternal = GVar && !GVar->hasInitializer(); + IsFunction = !GVar; + } else if (MO.isCPI()) + MOSymbol = GetCPISymbol(MO.getIndex()); + + if (IsFunction || IsExternal) + MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); + + const MCExpr *Exp = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_LO, + OutContext); + TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case PPC::ADDISgotTprelHA: { + // Transform: %Xd = ADDISgotTprelHA %X2, <ga:@sym> + // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymGotTprel = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + .addReg(MI->getOperand(0).getReg()) + .addReg(PPC::X2) + .addExpr(SymGotTprel)); + return; + } + case PPC::LDgotTprelL: { + // Transform %Xd = LDgotTprelL <ga:@sym>, %Xs + LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); + + // Change the opcode to LD. + TmpInst.setOpcode(PPC::LD); + const MachineOperand &MO = MI->getOperand(1); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *Exp = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO, + OutContext); + TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case PPC::ADDIStlsgdHA: { + // Transform: %Xd = ADDIStlsgdHA %X2, <ga:@sym> + // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymGotTlsGD = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + .addReg(MI->getOperand(0).getReg()) + .addReg(PPC::X2) + .addExpr(SymGotTlsGD)); + return; + } + case PPC::ADDItlsgdL: { + // Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym> + // Into: %Xd = ADDI8 %Xs, sym@got@tlsgd@l + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymGotTlsGD = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addExpr(SymGotTlsGD)); + return; + } + case PPC::GETtlsADDR: { + // Transform: %X3 = GETtlsADDR %X3, <ga:@sym> + // Into: BL8_NOP_TLSGD __tls_get_addr(sym@tlsgd) + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + + StringRef Name = "__tls_get_addr"; + MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); + const MCSymbolRefExpr *TlsRef = + MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymVar = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSGD) + .addExpr(TlsRef) + .addExpr(SymVar)); + return; + } + case PPC::ADDIStlsldHA: { + // Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym> + // Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymGotTlsLD = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + .addReg(MI->getOperand(0).getReg()) + .addReg(PPC::X2) + .addExpr(SymGotTlsLD)); + return; + } + case PPC::ADDItlsldL: { + // Transform: %Xd = ADDItlsldL %Xs, <ga:@sym> + // Into: %Xd = ADDI8 %Xs, sym@got@tlsld@l + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymGotTlsLD = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addExpr(SymGotTlsLD)); + return; + } + case PPC::GETtlsldADDR: { + // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym> + // Into: BL8_NOP_TLSLD __tls_get_addr(sym@tlsld) + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + + StringRef Name = "__tls_get_addr"; + MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); + const MCSymbolRefExpr *TlsRef = + MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymVar = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSLD) + .addExpr(TlsRef) + .addExpr(SymVar)); + return; + } + case PPC::ADDISdtprelHA: { + // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym> + // Into: %Xd = ADDIS8 %X3, sym@dtprel@ha + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymDtprel = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_HA, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8) + .addReg(MI->getOperand(0).getReg()) + .addReg(PPC::X3) + .addExpr(SymDtprel)); + return; + } + case PPC::ADDIdtprelL: { + // Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym> + // Into: %Xd = ADDI8 %Xs, sym@dtprel@l + assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); + const MachineOperand &MO = MI->getOperand(2); + const GlobalValue *GValue = MO.getGlobal(); + MCSymbol *MOSymbol = Mang->getSymbol(GValue); + const MCExpr *SymDtprel = + MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_LO, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addExpr(SymDtprel)); + return; + } case PPC::MFCRpseud: case PPC::MFCR8pseud: // Transform: %R3 = MFCRpseud %CR7 // Into: %R3 = MFCR ;; cr7 OutStreamer.AddComment(PPCInstPrinter:: getRegisterName(MI->getOperand(1).getReg())); - TmpInst.setOpcode(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - OutStreamer.EmitInstruction(TmpInst); + OutStreamer.EmitInstruction(MCInstBuilder(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR) + .addReg(MI->getOperand(0).getReg())); return; case PPC::SYNC: // In Book E sync is called msync, handle this special case here... @@ -438,14 +733,14 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { // Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function // entry point. OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext), - 8/*size*/, 0/*addrspace*/); + 8 /*size*/); MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.")); // Generates a R_PPC64_TOC relocation for TOC base insertion. OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, MCSymbolRefExpr::VK_PPC_TOC, OutContext), - 8/*size*/, 0/*addrspace*/); + 8/*size*/); // Emit a null environment pointer. - OutStreamer.EmitIntValue(0, 8 /* size */, 0 /* addrspace */); + OutStreamer.EmitIntValue(0, 8 /* size */); OutStreamer.SwitchSection(Current); MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol( @@ -474,6 +769,25 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { } } + MachineModuleInfoELF &MMIELF = + MMI->getObjFileInfo<MachineModuleInfoELF>(); + + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + // L_foo$stub: + OutStreamer.EmitLabel(Stubs[i].first); + // .long _foo + OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(), + OutContext), + isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + } + + Stubs.clear(); + OutStreamer.AddBlankLine(); + } + return AsmPrinter::doFinalization(M); } @@ -508,7 +822,12 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { "ppcA2", "ppce500mc", "ppce5500", + "power3", + "power4", + "power5", + "power5x", "power6", + "power6x", "power7", "ppc64" }; @@ -523,8 +842,11 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { assert(Directive <= PPC::DIR_64 && "Directive out of range."); // FIXME: This is a total hack, finish mc'izing the PPC backend. - if (OutStreamer.hasRawTextSupport()) + if (OutStreamer.hasRawTextSupport()) { + assert(Directive < sizeof(CPUDirectives) / sizeof(*CPUDirectives) && + "CPUDirectives[] might not be up-to-date!"); OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive])); + } // Prime text sections so they are adjacent. This reduces the likelihood a // large data or debug section causes a branch to exceed 16M limit. @@ -549,16 +871,13 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) { // Remove $stub suffix, add $lazy_ptr. - SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()-5); - TmpStr += "$lazy_ptr"; - return Ctx.GetOrCreateSymbol(TmpStr.str()); + StringRef NoStub = Sym->getName().substr(0, Sym->getName().size()-5); + return Ctx.GetOrCreateSymbol(NoStub + "$lazy_ptr"); } static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) { // Add $tmp suffix to $stub, yielding $stub$tmp. - SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()); - TmpStr += "$tmp"; - return Ctx.GetOrCreateSymbol(TmpStr.str()); + return Ctx.GetOrCreateSymbol(Sym->getName() + "$tmp"); } void PPCDarwinAsmPrinter:: @@ -589,32 +908,51 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { OutStreamer.EmitLabel(Stub); OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); - // FIXME: MCize this. - OutStreamer.EmitRawText(StringRef("\tmflr r0")); - OutStreamer.EmitRawText("\tbcl 20,31," + Twine(AnonSymbol->getName())); + + const MCExpr *Anon = MCSymbolRefExpr::Create(AnonSymbol, OutContext); + + // mflr r0 + OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R0)); + // bcl 20, 31, AnonSymbol + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCLalways).addExpr(Anon)); OutStreamer.EmitLabel(AnonSymbol); - OutStreamer.EmitRawText(StringRef("\tmflr r11")); - OutStreamer.EmitRawText("\taddis r11,r11,ha16("+Twine(LazyPtr->getName())+ - "-" + AnonSymbol->getName() + ")"); - OutStreamer.EmitRawText(StringRef("\tmtlr r0")); - - if (isPPC64) - OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) + - "-" + AnonSymbol->getName() + ")(r11)"); - else - OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) + - "-" + AnonSymbol->getName() + ")(r11)"); - OutStreamer.EmitRawText(StringRef("\tmtctr r12")); - OutStreamer.EmitRawText(StringRef("\tbctr")); - + // mflr r11 + OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11)); + // addis r11, r11, ha16(LazyPtr - AnonSymbol) + const MCExpr *Sub = + MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(LazyPtr, OutContext), + Anon, OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS) + .addReg(PPC::R11) + .addReg(PPC::R11) + .addExpr(Sub)); + // mtlr r0 + OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTLR).addReg(PPC::R0)); + + // ldu r12, lo16(LazyPtr - AnonSymbol)(r11) + // lwzu r12, lo16(LazyPtr - AnonSymbol)(r11) + OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) + .addReg(PPC::R12) + .addExpr(Sub).addExpr(Sub) + .addReg(PPC::R11)); + // mtctr r12 + OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12)); + // bctr + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR)); + OutStreamer.SwitchSection(LSPSection); OutStreamer.EmitLabel(LazyPtr); OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); - - if (isPPC64) - OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper")); - else - OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper")); + + MCSymbol *DyldStubBindingHelper = + OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper")); + if (isPPC64) { + // .quad dyld_stub_binding_helper + OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8); + } else { + // .long dyld_stub_binding_helper + OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4); + } } OutStreamer.AddBlankLine(); return; @@ -634,23 +972,42 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { EmitAlignment(4); OutStreamer.EmitLabel(Stub); OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); - OutStreamer.EmitRawText("\tlis r11,ha16(" + Twine(LazyPtr->getName()) +")"); - if (isPPC64) - OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) + - ")(r11)"); - else - OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) + - ")(r11)"); - OutStreamer.EmitRawText(StringRef("\tmtctr r12")); - OutStreamer.EmitRawText(StringRef("\tbctr")); + // lis r11, ha16(LazyPtr) + const MCExpr *LazyPtrHa16 = + MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_HA16, + OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(PPC::LIS) + .addReg(PPC::R11) + .addExpr(LazyPtrHa16)); + + const MCExpr *LazyPtrLo16 = + MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_LO16, + OutContext); + // ldu r12, lo16(LazyPtr)(r11) + // lwzu r12, lo16(LazyPtr)(r11) + OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU) + .addReg(PPC::R12) + .addExpr(LazyPtrLo16).addExpr(LazyPtrLo16) + .addReg(PPC::R11)); + + // mtctr r12 + OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12)); + // bctr + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCTR)); + OutStreamer.SwitchSection(LSPSection); OutStreamer.EmitLabel(LazyPtr); OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); - - if (isPPC64) - OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper")); - else - OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper")); + + MCSymbol *DyldStubBindingHelper = + OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper")); + if (isPPC64) { + // .quad dyld_stub_binding_helper + OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8); + } else { + // .long dyld_stub_binding_helper + OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4); + } } OutStreamer.AddBlankLine(); @@ -703,7 +1060,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { if (MCSym.getInt()) // External to current translation unit. - OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/); else // Internal to current translation unit. // @@ -713,7 +1070,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { // fill in the value for the NLP in those cases. OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), OutContext), - isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + isPPC64 ? 8 : 4/*size*/); } Stubs.clear(); @@ -732,7 +1089,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { OutStreamer.EmitValue(MCSymbolRefExpr:: Create(Stubs[i].second.getPointer(), OutContext), - isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/); + isPPC64 ? 8 : 4/*size*/); } Stubs.clear(); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp index 21a0fb2..bd1c378 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -17,21 +17,27 @@ #define DEBUG_TYPE "ppc-branch-select" #include "PPC.h" +#include "MCTargetDesc/PPCPredicates.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" -#include "MCTargetDesc/PPCPredicates.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; STATISTIC(NumExpanded, "Number of branches expanded to long format"); +namespace llvm { + void initializePPCBSelPass(PassRegistry&); +} + namespace { struct PPCBSel : public MachineFunctionPass { static char ID; - PPCBSel() : MachineFunctionPass(ID) {} + PPCBSel() : MachineFunctionPass(ID) { + initializePPCBSelPass(*PassRegistry::getPassRegistry()); + } /// BlockSizes - The sizes of the basic blocks in the function. std::vector<unsigned> BlockSizes; @@ -45,6 +51,9 @@ namespace { char PPCBSel::ID = 0; } +INITIALIZE_PASS(PPCBSel, "ppc-branch-select", "PowerPC Branch Selector", + false, false) + /// createPPCBranchSelectionPass - returns an instance of the Branch Selection /// Pass /// diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp index 2a2abb1..81a54d7 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -31,20 +31,20 @@ #define DEBUG_TYPE "ctrloops" #include "PPC.h" -#include "PPCTargetMachine.h" #include "MCTargetDesc/PPCPredicates.h" -#include "llvm/Constants.h" -#include "llvm/PassSupport.h" +#include "PPCTargetMachine.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Constants.h" +#include "llvm/PassSupport.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -54,6 +54,10 @@ using namespace llvm; STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops"); +namespace llvm { + void initializePPCCTRLoopsPass(PassRegistry&); +} + namespace { class CountValue; struct PPCCTRLoops : public MachineFunctionPass { @@ -64,7 +68,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - PPCCTRLoops() : MachineFunctionPass(ID) {} + PPCCTRLoops() : MachineFunctionPass(ID) { + initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnMachineFunction(MachineFunction &MF); @@ -174,15 +180,32 @@ namespace { }; } // end anonymous namespace +INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", + false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", + false, false) /// isCompareEquals - Returns true if the instruction is a compare equals /// instruction with an immediate operand. -static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) { - if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) { +static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp, + bool &Int64Cmp) { + if (MI->getOpcode() == PPC::CMPWI) { SignedCmp = true; + Int64Cmp = false; + return true; + } else if (MI->getOpcode() == PPC::CMPDI) { + SignedCmp = true; + Int64Cmp = true; + return true; + } else if (MI->getOpcode() == PPC::CMPLWI) { + SignedCmp = false; + Int64Cmp = false; return true; - } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) { + } else if (MI->getOpcode() == PPC::CMPLDI) { SignedCmp = false; + Int64Cmp = true; return true; } @@ -341,9 +364,9 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end(); RI != RE; ++RI) { IV_Opnd = &RI.getOperand(); - bool SignedCmp; + bool SignedCmp, Int64Cmp; MachineInstr *MI = IV_Opnd->getParent(); - if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) && + if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) && MI->getOperand(0).getReg() == PredReg) { OldInsts.push_back(MI); @@ -368,14 +391,14 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, assert(InitialValue->isReg() && "Expecting register for init value"); unsigned InitialValueReg = InitialValue->getReg(); - const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); + MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); // Here we need to look for an immediate load (an li or lis/ori pair). if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 || DefInstr->getOpcode() == PPC::ORI)) { - int64_t start = (short) DefInstr->getOperand(2).getImm(); - const MachineInstr *DefInstr2 = - MRI->getVRegDef(DefInstr->getOperand(0).getReg()); + int64_t start = DefInstr->getOperand(2).getImm(); + MachineInstr *DefInstr2 = + MRI->getVRegDef(DefInstr->getOperand(1).getReg()); if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 || DefInstr2->getOpcode() == PPC::LIS)) { DEBUG(dbgs() << " initial constant: " << *DefInstr); @@ -387,17 +410,33 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, if ((count % iv_value) != 0) { return 0; } - return new CountValue(count/iv_value); + + OldInsts.push_back(DefInstr); + OldInsts.push_back(DefInstr2); + + // count/iv_value, the trip count, should be positive here. If it + // is negative, that indicates that the counter will wrap. + if (Int64Cmp) + return new CountValue(count/iv_value); + else + return new CountValue(uint32_t(count/iv_value)); } } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 || DefInstr->getOpcode() == PPC::LI)) { DEBUG(dbgs() << " initial constant: " << *DefInstr); - int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm())); + int64_t count = ImmVal - + int64_t(short(DefInstr->getOperand(1).getImm())); if ((count % iv_value) != 0) { return 0; } - return new CountValue(count/iv_value); + + OldInsts.push_back(DefInstr); + + if (Int64Cmp) + return new CountValue(count/iv_value); + else + return new CountValue(uint32_t(count/iv_value)); } else if (iv_value == 1 || iv_value == -1) { // We can't determine a constant starting value. if (ImmVal == 0) { @@ -405,8 +444,8 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, } // FIXME: handle non-zero end value. } - // FIXME: handle non-unit increments (we might not want to introduce division - // but we can handle some 2^n cases with shifts). + // FIXME: handle non-unit increments (we might not want to introduce + // division but we can handle some 2^n cases with shifts). } } @@ -477,9 +516,10 @@ bool PPCCTRLoops::isDead(const MachineInstr *MI, if (MO.isReg() && MO.isDef()) { unsigned Reg = MO.getReg(); if (!MRI->use_nodbg_empty(Reg)) { - // This instruction has users, but if the only user is the phi node for the - // parent block, and the only use of that phi node is this instruction, then - // this instruction is dead: both it (and the phi node) can be removed. + // This instruction has users, but if the only user is the phi node for + // the parent block, and the only use of that phi node is this + // instruction, then this instruction is dead: both it (and the phi + // node) can be removed. MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg); if (llvm::next(I) == MRI->use_end() && I.getOperand().getParent()->isPHI()) { @@ -582,6 +622,16 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { DEBUG(dbgs() << "failed to get trip count!\n"); return false; } + + if (TripCount->isImm()) { + DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n"); + + // FIXME: We currently can't form 64-bit constants + // (including 32-bit unsigned constants) + if (!isInt<32>(TripCount->getImm())) + return false; + } + // Does the loop contain any invalid instructions? if (containsInvalidInstruction(L)) { return false; @@ -635,7 +685,7 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { const TargetRegisterClass *SrcRC = MF->getRegInfo().getRegClass(TripCount->getReg()); CountReg = MF->getRegInfo().createVirtualRegister(RC); - unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ? + unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ? (unsigned) PPC::EXTSW_32_64 : (unsigned) TargetOpcode::COPY; BuildMI(*Preheader, InsertPos, dl, @@ -652,13 +702,14 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { // Put the trip count in a register for transfer into the count register. int64_t CountImm = TripCount->getImm(); - assert(!TripCount->isNeg() && "Constant trip count must be positive"); + if (TripCount->isNeg()) + CountImm = -CountImm; CountReg = MF->getRegInfo().createVirtualRegister(RC); - if (CountImm > 0xFFFF) { + if (abs64(CountImm) > 0x7FFF) { BuildMI(*Preheader, InsertPos, dl, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), - CountReg).addImm(CountImm >> 16); + CountReg).addImm((CountImm >> 16) & 0xFFFF); unsigned CountReg1 = CountReg; CountReg = MF->getRegInfo().createVirtualRegister(RC); BuildMI(*Preheader, InsertPos, dl, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td index 3f87e88..c8a29a3 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -27,9 +27,10 @@ def RetCC_PPC : CallingConv<[ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>, + CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>, - CCIfType<[f32], CCAssignToReg<[F1]>>, - CCIfType<[f64], CCAssignToReg<[F1, F2]>>, + CCIfType<[f32], CCAssignToReg<[F1, F2]>>, + CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>, // Vector types are always returned in V2. CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>> @@ -37,49 +38,20 @@ def RetCC_PPC : CallingConv<[ //===----------------------------------------------------------------------===// -// PowerPC Argument Calling Conventions -//===----------------------------------------------------------------------===// -/* -def CC_PPC : CallingConv<[ - // The first 8 integer arguments are passed in integer registers. - CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, - CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>, - - // Common sub-targets passes FP values in F1 - F13 - CCIfType<[f32, f64], - CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>, - - // The first 12 Vector arguments are passed in altivec registers. - CCIfType<[v16i8, v8i16, v4i32, v4f32], - CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,V11,V12,V13]>> - -/* - // Integer/FP values get stored in stack slots that are 8 bytes in size and - // 8-byte aligned if there are no more registers to hold them. - CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, - - // Vectors get 16-byte stack slots that are 16-byte aligned. - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCAssignToStack<16, 16>>*/ -]>; - -*/ - -//===----------------------------------------------------------------------===// -// PowerPC System V Release 4 ABI +// PowerPC System V Release 4 32-bit ABI //===----------------------------------------------------------------------===// -def CC_PPC_SVR4_Common : CallingConv<[ +def CC_PPC32_SVR4_Common : CallingConv<[ // The ABI requires i64 to be passed in two adjacent registers with the first // register having an odd register number. - CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>, + CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>, // The first 8 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, // Make sure the i64 words from a long double are either both passed in // registers or both passed on the stack. - CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignFPArgRegs">>>, + CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignFPArgRegs">>>, // FP values are passed in F1 - F8. CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, @@ -100,18 +72,18 @@ def CC_PPC_SVR4_Common : CallingConv<[ // This calling convention puts vector arguments always on the stack. It is used // to assign vector arguments which belong to the variable portion of the // parameter list of a variable argument function. -def CC_PPC_SVR4_VarArg : CallingConv<[ - CCDelegateTo<CC_PPC_SVR4_Common> +def CC_PPC32_SVR4_VarArg : CallingConv<[ + CCDelegateTo<CC_PPC32_SVR4_Common> ]>; -// In contrast to CC_PPC_SVR4_VarArg, this calling convention first tries to put -// vector arguments in vector registers before putting them on the stack. -def CC_PPC_SVR4 : CallingConv<[ +// In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to +// put vector arguments in vector registers before putting them on the stack. +def CC_PPC32_SVR4 : CallingConv<[ // The first 12 Vector arguments are passed in AltiVec registers. CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>, - CCDelegateTo<CC_PPC_SVR4_Common> + CCDelegateTo<CC_PPC32_SVR4_Common> ]>; // Helper "calling convention" to handle aggregate by value arguments. @@ -122,15 +94,15 @@ def CC_PPC_SVR4 : CallingConv<[ // Still, the address of the aggregate copy in the callers stack frame is passed // in a GPR (or in the parameter list area if all GPRs are allocated) from the // caller to the callee. The location for the address argument is assigned by -// the CC_PPC_SVR4 calling convention. +// the CC_PPC32_SVR4 calling convention. // -// The only purpose of CC_PPC_SVR4_Custom_Dummy is to skip arguments which are +// The only purpose of CC_PPC32_SVR4_Custom_Dummy is to skip arguments which are // not passed by value. -def CC_PPC_SVR4_ByVal : CallingConv<[ +def CC_PPC32_SVR4_ByVal : CallingConv<[ CCIfByVal<CCPassByVal<4, 4>>, - CCCustom<"CC_PPC_SVR4_Custom_Dummy"> + CCCustom<"CC_PPC32_SVR4_Custom_Dummy"> ]>; def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20, @@ -164,3 +136,9 @@ def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAV F27, F28, F29, F30, F31, CR2, CR3, CR4, V20, V21, V22, V23, V24, V25, V26, V27, V28, V29, V30, V31)>; + +def CSR_NoRegs : CalleeSavedRegs<(add VRSAVE)>; +def CSR_NoRegs_Darwin : CalleeSavedRegs<(add)>; + +def CSR_NoRegs_Altivec : CalleeSavedRegs<(add (sequence "V%u", 0, 31), VRSAVE)>; + diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp index 252a2d1..6478718 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -12,15 +12,15 @@ // //===----------------------------------------------------------------------===// -#include "PPCTargetMachine.h" -#include "PPCRelocations.h" #include "PPC.h" -#include "llvm/Module.h" -#include "llvm/PassManager.h" +#include "PPCRelocations.h" +#include "PPCTargetMachine.h" #include "llvm/CodeGen/JITCodeEmitter.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Module.h" +#include "llvm/PassManager.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" @@ -68,6 +68,7 @@ namespace { unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const; const char *getPassName() const { return "PowerPC Machine Code Emitter"; } @@ -141,7 +142,7 @@ unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI, assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 || MI.getOpcode() == PPC::MFOCRF) && (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); - return 0x80 >> getPPCRegisterNumbering(MO.getReg()); + return 0x80 >> TM.getRegisterInfo()->getEncodingValue(MO.getReg()); } MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO, @@ -243,6 +244,13 @@ unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI, } +unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("TLS not supported on the old JIT."); + return 0; +} + + unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, const MachineOperand &MO) const { @@ -252,7 +260,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 && MI.getOpcode() != PPC::MFOCRF) || MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); - return getPPCRegisterNumbering(MO.getReg()); + return TM.getRegisterInfo()->getEncodingValue(MO.getReg()); } assert(MO.isImm() && diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index caf7bf2..3244b90 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -12,16 +12,16 @@ //===----------------------------------------------------------------------===// #include "PPCFrameLowering.h" -#include "PPCInstrInfo.h" #include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" -#include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Function.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -103,6 +103,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) { // transform this into the appropriate ORI instruction. static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { MachineFunction *MF = MI->getParent()->getParent(); + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); DebugLoc dl = MI->getDebugLoc(); unsigned UsedRegMask = 0; @@ -115,16 +116,25 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { for (MachineRegisterInfo::livein_iterator I = MF->getRegInfo().livein_begin(), E = MF->getRegInfo().livein_end(); I != E; ++I) { - unsigned RegNo = getPPCRegisterNumbering(I->first); + unsigned RegNo = TRI->getEncodingValue(I->first); if (VRRegNo[RegNo] == I->first) // If this really is a vector reg. UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. } - for (MachineRegisterInfo::liveout_iterator - I = MF->getRegInfo().liveout_begin(), - E = MF->getRegInfo().liveout_end(); I != E; ++I) { - unsigned RegNo = getPPCRegisterNumbering(*I); - if (VRRegNo[RegNo] == *I) // If this really is a vector reg. - UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. + + // Live out registers appear as use operands on return instructions. + for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); + UsedRegMask != 0 && BI != BE; ++BI) { + const MachineBasicBlock &MBB = *BI; + if (MBB.empty() || !MBB.back().isReturn()) + continue; + const MachineInstr &Ret = MBB.back(); + for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { + const MachineOperand &MO = Ret.getOperand(I); + if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) + continue; + unsigned RegNo = TRI->getEncodingValue(MO.getReg()); + UsedRegMask &= ~(1 << (31-RegNo)); + } } // If no registers are used, turn this into a copy. @@ -179,13 +189,31 @@ static bool spillsCR(const MachineFunction &MF) { return FuncInfo->isCRSpilled(); } +static bool spillsVRSAVE(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->isVRSAVESpilled(); +} + +static bool hasSpills(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->hasSpills(); +} + +static bool hasNonRISpills(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->hasNonRISpills(); +} + /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. -void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { +unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, + bool UpdateMF, + bool UseEstimate) const { MachineFrameInfo *MFI = MF.getFrameInfo(); // Get the number of bytes to allocate from the FrameInfo - unsigned FrameSize = MFI->getStackSize(); + unsigned FrameSize = + UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize(); // Get the alignments provided by the target, and the maximum alignment // (if any) of the fixed frame objects. @@ -198,13 +226,14 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { // to adjust the stack pointer (we fit in the Red Zone). For 64-bit // SVR4, we also require a stack frame if we need to spill the CR, // since this spill area is addressed relative to the stack pointer. - bool DisableRedZone = MF.getFunction()->getFnAttributes(). - hasAttribute(Attributes::NoRedZone); - // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. However, it can - // still generate stackless code if all local vars are reg-allocated. - // Try: (FrameSize <= 224 - // || (FrameSize == 0 && Subtarget.isPPC32 && Subtarget.isSVR4ABI())) + // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate + // stackless code if all local vars are reg-allocated. + bool DisableRedZone = MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone); if (!DisableRedZone && + (Subtarget.isPPC64() || // 32-bit SVR4, no stack- + !Subtarget.isSVR4ABI() || // allocated locals. + FrameSize == 0) && FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. @@ -213,8 +242,9 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { && spillsCR(MF)) && (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. // No need for frame - MFI->setStackSize(0); - return; + if (UpdateMF) + MFI->setStackSize(0); + return 0; } // Get the maximum call frame size of all the calls. @@ -231,7 +261,8 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; // Update maximum call frame size. - MFI->setMaxCallFrameSize(maxCallFrameSize); + if (UpdateMF) + MFI->setMaxCallFrameSize(maxCallFrameSize); // Include call frame size in total. FrameSize += maxCallFrameSize; @@ -240,7 +271,10 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { FrameSize = (FrameSize + AlignMask) & ~AlignMask; // Update frame info. - MFI->setStackSize(FrameSize); + if (UpdateMF) + MFI->setStackSize(FrameSize); + + return FrameSize; } // hasFP - Return true if the specified function actually has a dedicated frame @@ -261,7 +295,8 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { // Naked functions have no stack frame pushed, so we don't have a frame // pointer. - if (MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked)) + if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::Naked)) return false; return MF.getTarget().Options.DisableFramePointerElim(MF) || @@ -270,6 +305,31 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { MF.getInfo<PPCFunctionInfo>()->hasFastCall()); } +void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { + bool is31 = needsFP(MF); + unsigned FPReg = is31 ? PPC::R31 : PPC::R1; + unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; + + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); + BI != BE; ++BI) + for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { + --MBBI; + for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { + MachineOperand &MO = MBBI->getOperand(I); + if (!MO.isReg()) + continue; + + switch (MO.getReg()) { + case PPC::FP: + MO.setReg(FPReg); + break; + case PPC::FP8: + MO.setReg(FP8Reg); + break; + } + } + } +} void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB @@ -300,13 +360,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { MBBI = MBB.begin(); // Work out frame sizes. - // FIXME: determineFrameLayout() may change the frame size. This should be - // moved upper, to some hook. - determineFrameLayout(MF); - unsigned FrameSize = MFI->getStackSize(); - + unsigned FrameSize = determineFrameLayout(MF); int NegFrameSize = -FrameSize; + if (MFI->isFrameAddressTaken()) + replaceFPWithRealFP(MF); + // Get processor type. bool isPPC64 = Subtarget.isPPC64(); // Get operating system @@ -769,14 +828,15 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { void PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { + RegScavenger *) const { const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); // Save and clear the LR state. PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); unsigned LR = RegInfo->getRARegister(); FI->setMustSaveLR(MustSaveLR(MF, LR)); - MF.getRegInfo().setPhysRegUnused(LR); + MachineRegisterInfo &MRI = MF.getRegInfo(); + MRI.setPhysRegUnused(LR); // Save R31 if necessary int FPSI = FI->getFramePointerSaveIndex(); @@ -801,29 +861,24 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); } - // Reserve a slot closest to SP or frame pointer if we have a dynalloc or - // a large stack, which will require scavenging a register to materialize a - // large offset. - // FIXME: this doesn't actually check stack size, so is a bit pessimistic - // FIXME: doesn't detect whether or not we need to spill vXX, which requires - // r0 for now. - - if (RegInfo->requiresRegisterScavenging(MF)) - if (needsFP(MF) || spillsCR(MF)) { - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - } + // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the + // function uses CR 2, 3, or 4. + if (!isPPC64 && !isDarwinABI && + (MRI.isPhysRegUsed(PPC::CR2) || + MRI.isPhysRegUsed(PPC::CR3) || + MRI.isPhysRegUsed(PPC::CR4))) { + int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); + FI->setCRSpillFrameIndex(FrameIdx); + } } -void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) - const { +void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const { // Early exit if not using the SVR4 ABI. - if (!Subtarget.isSVR4ABI()) + if (!Subtarget.isSVR4ABI()) { + addScavengingSpillSlot(MF, RS); return; + } // Get callee saved register information. MachineFrameInfo *FFI = MF.getFrameInfo(); @@ -831,6 +886,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) // Early exit if no callee saved registers are modified! if (CSI.empty() && !needsFP(MF)) { + addScavengingSpillSlot(MF, RS); return; } @@ -895,6 +951,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) } PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); int64_t LowerBound = 0; @@ -914,7 +971,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } - LowerBound -= (31 - getPPCRegisterNumbering(MinFPR) + 1) * 8; + LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; } // Check whether the frame pointer register is allocated. If so, make sure it @@ -948,8 +1005,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) } unsigned MinReg = - std::min<unsigned>(getPPCRegisterNumbering(MinGPR), - getPPCRegisterNumbering(MinG8R)); + std::min<unsigned>(TRI->getEncodingValue(MinGPR), + TRI->getEncodingValue(MinG8R)); if (Subtarget.isPPC64()) { LowerBound -= (31 - MinReg + 1) * 8; @@ -1009,6 +1066,44 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } } + + addScavengingSpillSlot(MF, RS); +} + +void +PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, + RegScavenger *RS) const { + // Reserve a slot closest to SP or frame pointer if we have a dynalloc or + // a large stack, which will require scavenging a register to materialize a + // large offset. + + // We need to have a scavenger spill slot for spills if the frame size is + // large. In case there is no free register for large-offset addressing, + // this slot is used for the necessary emergency spill. Also, we need the + // slot for dynamic stack allocations. + + // The scavenger might be invoked if the frame offset does not fit into + // the 16-bit immediate. We don't know the complete frame size here + // because we've not yet computed callee-saved register spills or the + // needed alignment padding. + unsigned StackSize = determineFrameLayout(MF, false, true); + MachineFrameInfo *MFI = MF.getFrameInfo(); + if (MFI->hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || + hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; + RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + + // These kinds of spills might need two registers. + if (spillsCR(MF) || spillsVRSAVE(MF)) + RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + + } } bool @@ -1046,8 +1141,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, // save slot via GPR12 (available in the prolog for 32- and 64-bit). if (Subtarget.isPPC64()) { // 64-bit: SP+8 - MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::X12)); - MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW)) + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR8), PPC::X12)); + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW8)) .addReg(PPC::X12, getKillRegState(true)) .addImm(8) @@ -1087,7 +1182,7 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, if (isPPC64) { // 64-bit: SP+8 - MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ), PPC::X12) + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ8), PPC::X12) .addImm(8) .addReg(PPC::X1)); RestoreOp = PPC::MTCRF8; @@ -1103,15 +1198,56 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, if (CR2Spilled) MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) - .addReg(MoveReg)); + .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); if (CR3Spilled) MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) - .addReg(MoveReg)); + .addReg(MoveReg, getKillRegState(!CR4Spilled))); if (CR4Spilled) MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) - .addReg(MoveReg)); + .addReg(MoveReg, getKillRegState(true))); +} + +void PPCFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const PPCInstrInfo &TII = + *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo()); + if (MF.getTarget().Options.GuaranteedTailCallOpt && + I->getOpcode() == PPC::ADJCALLSTACKUP) { + // Add (actually subtract) back the amount the callee popped on return. + if (int CalleeAmt = I->getOperand(1).getImm()) { + bool is64Bit = Subtarget.isPPC64(); + CalleeAmt *= -1; + unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; + unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; + unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; + unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; + unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; + unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; + MachineInstr *MI = I; + DebugLoc dl = MI->getDebugLoc(); + + if (isInt<16>(CalleeAmt)) { + BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) + .addReg(StackReg, RegState::Kill) + .addImm(CalleeAmt); + } else { + MachineBasicBlock::iterator MBBI = I; + BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) + .addImm(CalleeAmt >> 16); + BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) + .addReg(TmpReg, RegState::Kill) + .addImm(CalleeAmt & 0xFFFF); + BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) + .addReg(StackReg, RegState::Kill) + .addReg(TmpReg); + } + } + } + // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. + MBB.erase(I); } bool diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h index 4d957b9..6f5f936 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h @@ -15,9 +15,9 @@ #include "PPC.h" #include "PPCSubtarget.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/STLExtras.h" namespace llvm { class PPCSubtarget; @@ -27,11 +27,14 @@ class PPCFrameLowering: public TargetFrameLowering { public: PPCFrameLowering(const PPCSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0), + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, + (sti.hasQPX() || sti.isBGQ()) ? 32 : 16, 0), Subtarget(sti) { } - void determineFrameLayout(MachineFunction &MF) const; + unsigned determineFrameLayout(MachineFunction &MF, + bool UpdateMF = true, + bool UseEstimate = false) const; /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. @@ -40,16 +43,23 @@ public: bool hasFP(const MachineFunction &MF) const; bool needsFP(const MachineFunction &MF) const; + void replaceFPWithRealFP(MachineFunction &MF) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; + void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, @@ -139,6 +149,9 @@ public: return 0; } + // Note that the offsets here overlap, but this is fixed up in + // processFunctionBeforeFrameFinalized. + static const SpillSlot Offsets[] = { // Floating-point register save area offsets. {PPC::F31, -8}, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 6ed1fb9..4bf1e33 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -179,7 +179,7 @@ getHazardType(SUnit *SU, int Stalls) { } // Do not allow MTCTR and BCTRL to be in the same dispatch group. - if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4)) + if (HasCTRSet && Opcode == PPC::BCTRL) return NoopHazard; // If this is a load following a store, make sure it's not to the same or diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 254fea6..95efc11 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -14,24 +14,30 @@ #define DEBUG_TYPE "ppc-codegen" #include "PPC.h" -#include "PPCTargetMachine.h" #include "MCTargetDesc/PPCPredicates.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "PPCTargetMachine.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/GlobalValue.h" -#include "llvm/Intrinsics.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; +namespace llvm { + void initializePPCDAGToDAGISelPass(PassRegistry&); +} + namespace { //===--------------------------------------------------------------------===// /// PPCDAGToDAGISel - PPC specific code to select PPC machine @@ -46,7 +52,9 @@ namespace { explicit PPCDAGToDAGISel(PPCTargetMachine &tm) : SelectionDAGISel(tm), TM(tm), PPCLowering(*TM.getTargetLowering()), - PPCSubTarget(*TM.getSubtargetImpl()) {} + PPCSubTarget(*TM.getSubtargetImpl()) { + initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry()); + } virtual bool runOnMachineFunction(MachineFunction &MF) { // Make sure we re-emit a set of the global base reg if necessary @@ -59,6 +67,8 @@ namespace { return true; } + virtual void PostprocessISelDAG(); + /// getI32Imm - Return a target constant with the specified value, of type /// i32. inline SDValue getI32Imm(unsigned Imm) { @@ -110,10 +120,10 @@ namespace { } /// SelectAddrImmOffs - Return true if the operand is valid for a preinc - /// immediate field. Because preinc imms have already been validated, just - /// accept it. + /// immediate field. Note that the operand at this point is already the + /// result of a prior SelectAddressRegImm call. bool SelectAddrImmOffs(SDValue N, SDValue &Out) const { - if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo || + if (N.getOpcode() == ISD::TargetConstant || N.getOpcode() == ISD::TargetGlobalAddress) { Out = N; return true; @@ -122,18 +132,6 @@ namespace { return false; } - /// SelectAddrIdxOffs - Return true if the operand is valid for a preinc - /// index field. Because preinc imms have already been validated, just - /// accept it. - bool SelectAddrIdxOffs(SDValue N, SDValue &Out) const { - if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo || - N.getOpcode() == ISD::TargetGlobalAddress) - return false; - - Out = N; - return true; - } - /// SelectAddrIdx - Given the specified addressed, check to see if it can be /// represented as an indexed [r+r] operation. Returns false if it can /// be represented by [r+imm], which are preferred. @@ -154,6 +152,12 @@ namespace { return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG); } + // Select an address into a single register. + bool SelectAddr(SDValue N, SDValue &Base) { + Base = N; + return true; + } + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. It is always correct to compute the value into /// a register. The case of adding a (possibly relocatable) constant to a @@ -1040,7 +1044,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { break; SDValue Offset = LD->getOffset(); - if (isa<ConstantSDNode>(Offset) || + if (Offset.getOpcode() == ISD::TargetConstant || Offset.getOpcode() == ISD::TargetGlobalAddress) { unsigned Opcode; @@ -1107,7 +1111,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); - SDValue Ops[] = { Offset, Base, Chain }; + SDValue Ops[] = { Base, Offset, Chain }; return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), PPCLowering.getPointerTy(), MVT::Other, Ops, 3); @@ -1268,11 +1272,277 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { Chain), 0); return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); } + case PPCISD::TOC_ENTRY: { + assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI"); + + // For medium and large code model, we generate two instructions as + // described below. Otherwise we allow SelectCodeCommon to handle this, + // selecting one of LDtoc, LDtocJTI, and LDtocCPT. + CodeModel::Model CModel = TM.getCodeModel(); + if (CModel != CodeModel::Medium && CModel != CodeModel::Large) + break; + + // The first source operand is a TargetGlobalAddress or a + // TargetJumpTable. If it is an externally defined symbol, a symbol + // with common linkage, a function address, or a jump table address, + // or if we are generating code for large code model, we generate: + // LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>)) + // Otherwise we generate: + // ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>) + SDValue GA = N->getOperand(0); + SDValue TOCbase = N->getOperand(1); + SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64, + TOCbase, GA); + + if (isa<JumpTableSDNode>(GA) || CModel == CodeModel::Large) + return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, + SDValue(Tmp, 0)); + + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) { + const GlobalValue *GValue = G->getGlobal(); + const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue); + const GlobalValue *RealGValue = GAlias ? + GAlias->resolveAliasedGlobal(false) : GValue; + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue); + assert((GVar || isa<Function>(RealGValue)) && + "Unexpected global value subclass!"); + + // An external variable is one without an initializer. For these, + // for variables with common linkage, and for Functions, generate + // the LDtocL form. + if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() || + RealGValue->hasAvailableExternallyLinkage()) + return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, + SDValue(Tmp, 0)); + } + + return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, + SDValue(Tmp, 0), GA); + } + case PPCISD::VADD_SPLAT: { + // This expands into one of three sequences, depending on whether + // the first operand is odd or even, positive or negative. + assert(isa<ConstantSDNode>(N->getOperand(0)) && + isa<ConstantSDNode>(N->getOperand(1)) && + "Invalid operand on VADD_SPLAT!"); + + int Elt = N->getConstantOperandVal(0); + int EltSize = N->getConstantOperandVal(1); + unsigned Opc1, Opc2, Opc3; + EVT VT; + + if (EltSize == 1) { + Opc1 = PPC::VSPLTISB; + Opc2 = PPC::VADDUBM; + Opc3 = PPC::VSUBUBM; + VT = MVT::v16i8; + } else if (EltSize == 2) { + Opc1 = PPC::VSPLTISH; + Opc2 = PPC::VADDUHM; + Opc3 = PPC::VSUBUHM; + VT = MVT::v8i16; + } else { + assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!"); + Opc1 = PPC::VSPLTISW; + Opc2 = PPC::VADDUWM; + Opc3 = PPC::VSUBUWM; + VT = MVT::v4i32; + } + + if ((Elt & 1) == 0) { + // Elt is even, in the range [-32,-18] + [16,30]. + // + // Convert: VADD_SPLAT elt, size + // Into: tmp = VSPLTIS[BHW] elt + // VADDU[BHW]M tmp, tmp + // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4 + SDValue EltVal = getI32Imm(Elt >> 1); + SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + SDValue TmpVal = SDValue(Tmp, 0); + return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal); + + } else if (Elt > 0) { + // Elt is odd and positive, in the range [17,31]. + // + // Convert: VADD_SPLAT elt, size + // Into: tmp1 = VSPLTIS[BHW] elt-16 + // tmp2 = VSPLTIS[BHW] -16 + // VSUBU[BHW]M tmp1, tmp2 + SDValue EltVal = getI32Imm(Elt - 16); + SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + EltVal = getI32Imm(-16); + SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0), + SDValue(Tmp2, 0)); + + } else { + // Elt is odd and negative, in the range [-31,-17]. + // + // Convert: VADD_SPLAT elt, size + // Into: tmp1 = VSPLTIS[BHW] elt+16 + // tmp2 = VSPLTIS[BHW] -16 + // VADDU[BHW]M tmp1, tmp2 + SDValue EltVal = getI32Imm(Elt + 16); + SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + EltVal = getI32Imm(-16); + SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); + return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0), + SDValue(Tmp2, 0)); + } + } } return SelectCode(N); } +/// PostProcessISelDAG - Perform some late peephole optimizations +/// on the DAG representation. +void PPCDAGToDAGISel::PostprocessISelDAG() { + + // Skip peepholes at -O0. + if (TM.getOptLevel() == CodeGenOpt::None) + return; + + // These optimizations are currently supported only for 64-bit SVR4. + if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64()) + return; + + SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); + ++Position; + + while (Position != CurDAG->allnodes_begin()) { + SDNode *N = --Position; + // Skip dead nodes and any non-machine opcodes. + if (N->use_empty() || !N->isMachineOpcode()) + continue; + + unsigned FirstOp; + unsigned StorageOpcode = N->getMachineOpcode(); + + switch (StorageOpcode) { + default: continue; + + case PPC::LBZ: + case PPC::LBZ8: + case PPC::LD: + case PPC::LFD: + case PPC::LFS: + case PPC::LHA: + case PPC::LHA8: + case PPC::LHZ: + case PPC::LHZ8: + case PPC::LWA: + case PPC::LWZ: + case PPC::LWZ8: + FirstOp = 0; + break; + + case PPC::STB: + case PPC::STB8: + case PPC::STD: + case PPC::STFD: + case PPC::STFS: + case PPC::STH: + case PPC::STH8: + case PPC::STW: + case PPC::STW8: + FirstOp = 1; + break; + } + + // If this is a load or store with a zero offset, we may be able to + // fold an add-immediate into the memory operation. + if (!isa<ConstantSDNode>(N->getOperand(FirstOp)) || + N->getConstantOperandVal(FirstOp) != 0) + continue; + + SDValue Base = N->getOperand(FirstOp + 1); + if (!Base.isMachineOpcode()) + continue; + + unsigned Flags = 0; + bool ReplaceFlags = true; + + // When the feeding operation is an add-immediate of some sort, + // determine whether we need to add relocation information to the + // target flags on the immediate operand when we fold it into the + // load instruction. + // + // For something like ADDItocL, the relocation information is + // inferred from the opcode; when we process it in the AsmPrinter, + // we add the necessary relocation there. A load, though, can receive + // relocation from various flavors of ADDIxxx, so we need to carry + // the relocation information in the target flags. + switch (Base.getMachineOpcode()) { + default: continue; + + case PPC::ADDI8: + case PPC::ADDI: + // In some cases (such as TLS) the relocation information + // is already in place on the operand, so copying the operand + // is sufficient. + ReplaceFlags = false; + // For these cases, the immediate may not be divisible by 4, in + // which case the fold is illegal for DS-form instructions. (The + // other cases provide aligned addresses and are always safe.) + if ((StorageOpcode == PPC::LWA || + StorageOpcode == PPC::LD || + StorageOpcode == PPC::STD) && + (!isa<ConstantSDNode>(Base.getOperand(1)) || + Base.getConstantOperandVal(1) % 4 != 0)) + continue; + break; + case PPC::ADDIdtprelL: + Flags = PPCII::MO_DTPREL16_LO; + break; + case PPC::ADDItlsldL: + Flags = PPCII::MO_TLSLD16_LO; + break; + case PPC::ADDItocL: + Flags = PPCII::MO_TOC16_LO; + break; + } + + // We found an opportunity. Reverse the operands from the add + // immediate and substitute them into the load or store. If + // needed, update the target flags for the immediate operand to + // reflect the necessary relocation information. + DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); + DEBUG(Base->dump(CurDAG)); + DEBUG(dbgs() << "\nN: "); + DEBUG(N->dump(CurDAG)); + DEBUG(dbgs() << "\n"); + + SDValue ImmOpnd = Base.getOperand(1); + + // If the relocation information isn't already present on the + // immediate operand, add it now. + if (ReplaceFlags) { + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { + DebugLoc dl = GA->getDebugLoc(); + const GlobalValue *GV = GA->getGlobal(); + ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags); + } else if (ConstantPoolSDNode *CP = + dyn_cast<ConstantPoolSDNode>(ImmOpnd)) { + const Constant *C = CP->getConstVal(); + ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, + CP->getAlignment(), + 0, Flags); + } + } + + if (FirstOp == 1) // Store + (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, + Base.getOperand(0), N->getOperand(3)); + else // Load + (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), + N->getOperand(2)); + + // The add-immediate may now be dead, in which case remove it. + if (Base.getNode()->use_empty()) + CurDAG->RemoveDeadNode(Base.getNode()); + } +} /// createPPCISelDag - This pass converts a legalized DAG into a @@ -1282,3 +1552,14 @@ FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) { return new PPCDAGToDAGISel(TM); } +static void initializePassOnce(PassRegistry &Registry) { + const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection"; + PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID, 0, + false, false); + Registry.registerPass(*PI, true); +} + +void llvm::initializePPCDAGToDAGISelPass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializePassOnce); +} + diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index adf78d5..16fc8a0 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -12,15 +12,10 @@ //===----------------------------------------------------------------------===// #include "PPCISelLowering.h" +#include "MCTargetDesc/PPCPredicates.h" #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" #include "PPCTargetMachine.h" -#include "MCTargetDesc/PPCPredicates.h" -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -29,6 +24,11 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -36,20 +36,20 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; -static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); -static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State); -static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, +static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); +static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); @@ -57,6 +57,9 @@ cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); +static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", +cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); + static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { if (TM.getSubtargetImpl()->isDarwin()) return new TargetLoweringObjectFileMachO(); @@ -67,6 +70,7 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>(); + PPCRegInfo = TM.getRegisterInfo(); setPow2DivIsCheap(); @@ -112,6 +116,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); + setOperationAction(ISD::FREM, MVT::ppcf128, Expand); // PowerPC has no SREM/UREM instructions setOperationAction(ISD::SREM, MVT::i32, Expand); @@ -132,11 +137,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // We don't support sin/cos/sqrt/fmod/pow setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand); setOperationAction(ISD::FMA , MVT::f64, Legal); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f32, Expand); setOperationAction(ISD::FMA , MVT::f32, Legal); @@ -144,26 +151,58 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); // If we're enabling GP optimizations, use hardware square root - if (!Subtarget->hasFSQRT()) { + if (!Subtarget->hasFSQRT() && + !(TM.Options.UnsafeFPMath && + Subtarget->hasFRSQRTE() && Subtarget->hasFRE())) setOperationAction(ISD::FSQRT, MVT::f64, Expand); + + if (!Subtarget->hasFSQRT() && + !(TM.Options.UnsafeFPMath && + Subtarget->hasFRSQRTES() && Subtarget->hasFRES())) setOperationAction(ISD::FSQRT, MVT::f32, Expand); - } setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + if (Subtarget->hasFPRND()) { + setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + setOperationAction(ISD::FCEIL, MVT::f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + + setOperationAction(ISD::FFLOOR, MVT::f32, Legal); + setOperationAction(ISD::FCEIL, MVT::f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::f32, Legal); + + // frin does not implement "ties to even." Thus, this is safe only in + // fast-math mode. + if (TM.Options.UnsafeFPMath) { + setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); + + // These need to set FE_INEXACT, and use a custom inserter. + setOperationAction(ISD::FRINT, MVT::f64, Legal); + setOperationAction(ISD::FRINT, MVT::f32, Legal); + } + } + // PowerPC does not have BSWAP, CTPOP or CTTZ setOperationAction(ISD::BSWAP, MVT::i32 , Expand); - setOperationAction(ISD::CTPOP, MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::BSWAP, MVT::i64 , Expand); - setOperationAction(ISD::CTPOP, MVT::i64 , Expand); setOperationAction(ISD::CTTZ , MVT::i64 , Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); + if (Subtarget->hasPOPCNTD()) { + setOperationAction(ISD::CTPOP, MVT::i32 , Legal); + setOperationAction(ISD::CTPOP, MVT::i64 , Legal); + } else { + setOperationAction(ISD::CTPOP, MVT::i32 , Expand); + setOperationAction(ISD::CTPOP, MVT::i64 , Expand); + } + // PowerPC does not have ROTR setOperationAction(ISD::ROTR, MVT::i32 , Expand); setOperationAction(ISD::ROTR, MVT::i64 , Expand); @@ -206,6 +245,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support + // SjLj exception handling but a light-weight setjmp/longjmp replacement to + // support continuation, user-level threading, and etc.. As a result, no + // other SjLj exception interfaces are implemented and please don't build + // your own exception handling based on them. + // LLVM/Clang supports zero-cost DWARF exception handling. + setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); + setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); // We want to legalize GlobalAddress and ConstantPool nodes into the // appropriate instructions to materialize the address. @@ -285,15 +332,28 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // We cannot do this with Promote because i64 is not a legal type. setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - // FIXME: disable this lowered code. This generates 64-bit register values, - // and we don't model the fact that the top part is clobbered by calls. We - // need to flag these together so that the value isn't live across a call. - //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64()) + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); } else { // PowerPC does not have FP_TO_UINT on 32-bit implementations. setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); } + // With the instructions enabled under FPCVT, we can do everything. + if (PPCSubTarget.hasFPCVT()) { + if (Subtarget->has64BitSupport()) { + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); + } + + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); + } + if (Subtarget->use64BitRegs()) { // 64-bit PowerPC implementations can support i64 types directly addRegisterClass(MVT::i64, &PPC::G8RCRegClass); @@ -347,6 +407,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::FNEG, VT, Expand); + setOperationAction(ISD::FSQRT, VT, Expand); + setOperationAction(ISD::FLOG, VT, Expand); + setOperationAction(ISD::FLOG10, VT, Expand); + setOperationAction(ISD::FLOG2, VT, Expand); + setOperationAction(ISD::FEXP, VT, Expand); + setOperationAction(ISD::FEXP2, VT, Expand); + setOperationAction(ISD::FSIN, VT, Expand); + setOperationAction(ISD::FCOS, VT, Expand); + setOperationAction(ISD::FABS, VT, Expand); + setOperationAction(ISD::FPOWI, VT, Expand); + setOperationAction(ISD::FFLOOR, VT, Expand); + setOperationAction(ISD::FCEIL, VT, Expand); + setOperationAction(ISD::FTRUNC, VT, Expand); + setOperationAction(ISD::FRINT, VT, Expand); + setOperationAction(ISD::FNEARBYINT, VT, Expand); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); setOperationAction(ISD::BUILD_VECTOR, VT, Expand); @@ -361,6 +436,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); + setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; @@ -373,12 +449,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setLoadExtAction(ISD::EXTLOAD, VT, Expand); } - for (unsigned i = (unsigned)MVT::FIRST_FP_VECTOR_VALUETYPE; - i <= (unsigned)MVT::LAST_FP_VECTOR_VALUETYPE; ++i) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)i; - setOperationAction(ISD::FSQRT, VT, Expand); - } - // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle // with merges, splats, etc. setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); @@ -393,6 +463,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); @@ -401,6 +475,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::MUL, MVT::v4f32, Legal); setOperationAction(ISD::FMA, MVT::v4f32, Legal); + + if (TM.Options.UnsafeFPMath) { + setOperationAction(ISD::FDIV, MVT::v4f32, Legal); + setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); + } + setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v8i16, Custom); setOperationAction(ISD::MUL, MVT::v16i8, Custom); @@ -429,6 +509,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? @@ -449,6 +531,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setTargetDAGCombine(ISD::BR_CC); setTargetDAGCombine(ISD::BSWAP); + // Use reciprocal estimates. + if (TM.Options.UnsafeFPMath) { + setTargetDAGCombine(ISD::FDIV); + setTargetDAGCombine(ISD::FSQRT); + } + // Darwin long double math library functions have $LDBL128 appended. if (Subtarget->isDarwin()) { setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); @@ -482,15 +570,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // friends. Gcc uses same threshold of 128 bytes (= 32 word stores). if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc || Subtarget->getDarwinDirective() == PPC::DIR_E5500) { - maxStoresPerMemset = 32; - maxStoresPerMemsetOptSize = 16; - maxStoresPerMemcpy = 32; - maxStoresPerMemcpyOptSize = 8; - maxStoresPerMemmove = 32; - maxStoresPerMemmoveOptSize = 8; + MaxStoresPerMemset = 32; + MaxStoresPerMemsetOptSize = 16; + MaxStoresPerMemcpy = 32; + MaxStoresPerMemcpyOptSize = 8; + MaxStoresPerMemmove = 32; + MaxStoresPerMemmoveOptSize = 8; setPrefFunctionAlignment(4); - benefitFromCodePlacementOpt = true; } } @@ -521,6 +608,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::FCFID: return "PPCISD::FCFID"; case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; + case PPCISD::FRE: return "PPCISD::FRE"; + case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; case PPCISD::STFIWX: return "PPCISD::STFIWX"; case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; @@ -536,16 +625,13 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::SRL: return "PPCISD::SRL"; case PPCISD::SRA: return "PPCISD::SRA"; case PPCISD::SHL: return "PPCISD::SHL"; - case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; - case PPCISD::STD_32: return "PPCISD::STD_32"; - case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4"; - case PPCISD::CALL_NOP_SVR4: return "PPCISD::CALL_NOP_SVR4"; - case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin"; - case PPCISD::NOP: return "PPCISD::NOP"; + case PPCISD::CALL: return "PPCISD::CALL"; + case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; - case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin"; - case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4"; + case PPCISD::BCTRL: return "PPCISD::BCTRL"; case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; + case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; + case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; case PPCISD::MFCR: return "PPCISD::MFCR"; case PPCISD::VCMP: return "PPCISD::VCMP"; case PPCISD::VCMPo: return "PPCISD::VCMPo"; @@ -555,13 +641,25 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::STCX: return "PPCISD::STCX"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; case PPCISD::MFFS: return "PPCISD::MFFS"; - case PPCISD::MTFSB0: return "PPCISD::MTFSB0"; - case PPCISD::MTFSB1: return "PPCISD::MTFSB1"; case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; - case PPCISD::MTFSF: return "PPCISD::MTFSF"; case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; case PPCISD::CR6SET: return "PPCISD::CR6SET"; case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; + case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA"; + case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L"; + case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L"; + case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; + case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; + case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; + case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; + case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; + case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; + case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; + case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; + case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; + case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; + case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; + case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; } } @@ -995,7 +1093,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, short Imm; if (isIntS16Immediate(CN, Imm)) { Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); return true; } @@ -1044,7 +1142,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, } // Otherwise, do it the hard way, using R0 as the base register. - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, N.getValueType()); Index = N; return true; @@ -1107,7 +1205,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, short Imm; if (isIntS16Immediate(CN, Imm)) { Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy()); - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); return true; } @@ -1145,15 +1243,19 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SelectionDAG &DAG) const { if (DisablePPCPreinc) return false; + bool isLoad = true; SDValue Ptr; EVT VT; + unsigned Alignment; if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { Ptr = LD->getBasePtr(); VT = LD->getMemoryVT(); - + Alignment = LD->getAlignment(); } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { Ptr = ST->getBasePtr(); VT = ST->getMemoryVT(); + Alignment = ST->getAlignment(); + isLoad = false; } else return false; @@ -1161,7 +1263,25 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (VT.isVector()) return false; - if (SelectAddressRegReg(Ptr, Offset, Base, DAG)) { + if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { + + // Common code will reject creating a pre-inc form if the base pointer + // is a frame index, or if N is a store and the base pointer is either + // the same as or a predecessor of the value being stored. Check for + // those situations here, and try with swapped Base/Offset instead. + bool Swap = false; + + if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base)) + Swap = true; + else if (!isLoad) { + SDValue Val = cast<StoreSDNode>(N)->getValue(); + if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) + Swap = true; + } + + if (Swap) + std::swap(Base, Offset); + AM = ISD::PRE_INC; return true; } @@ -1172,6 +1292,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (!SelectAddressRegImm(Ptr, Offset, Base, DAG)) return false; } else { + // LDU/STU need an address with at least 4-byte alignment. + if (Alignment < 4) + return false; + // reg + imm * 4. if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG)) return false; @@ -1308,19 +1432,81 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, EVT PtrVT = getPointerTy(); bool is64bit = PPCSubTarget.isPPC64(); - TLSModel::Model model = getTargetMachine().getTLSModel(GV); + TLSModel::Model Model = getTargetMachine().getTLSModel(GV); + + if (Model == TLSModel::LocalExec) { + SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + PPCII::MO_TPREL16_HA); + SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + PPCII::MO_TPREL16_LO); + SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, + is64bit ? MVT::i64 : MVT::i32); + SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); + return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); + } + + if (!is64bit) + llvm_unreachable("only local-exec is currently supported for ppc32"); + + if (Model == TLSModel::InitialExec) { + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, + PtrVT, GOTReg, TGA); + SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, + PtrVT, TGA, TPOffsetHi); + return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA); + } + + if (Model == TLSModel::GeneralDynamic) { + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, + GOTReg, TGA); + SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT, + GOTEntryHi, TGA); + + // We need a chain node, and don't have one handy. The underlying + // call has no side effects, so using the function entry node + // suffices. + SDValue Chain = DAG.getEntryNode(); + Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); + SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); + SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl, + PtrVT, ParmReg, TGA); + // The return value from GET_TLS_ADDR really is in X3 already, but + // some hacks are needed here to tie everything together. The extra + // copies dissolve during subsequent transforms. + Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); + return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT); + } - SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - PPCII::MO_TPREL16_HA); - SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - PPCII::MO_TPREL16_LO); + if (Model == TLSModel::LocalDynamic) { + SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); + SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); + SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, + GOTReg, TGA); + SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT, + GOTEntryHi, TGA); + + // We need a chain node, and don't have one handy. The underlying + // call has no side effects, so using the function entry node + // suffices. + SDValue Chain = DAG.getEntryNode(); + Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); + SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); + SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl, + PtrVT, ParmReg, TGA); + // The return value from GET_TLSLD_ADDR really is in X3 already, but + // some hacks are needed here to tie everything together. The extra + // copies dissolve during subsequent transforms. + Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); + SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, + Chain, ParmReg, TGA); + return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); + } - if (model != TLSModel::LocalExec) - llvm_unreachable("only local-exec TLS mode supported"); - SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, - is64bit ? MVT::i64 : MVT::i32); - SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); - return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); + llvm_unreachable("Unknown TLS model!"); } SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, @@ -1654,18 +1840,18 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, #include "PPCGenCallingConv.inc" -static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { +static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { return true; } -static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { +static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { static const uint16_t ArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, @@ -1688,11 +1874,11 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, return false; } -static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, - CCState &State) { +static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { static const uint16_t ArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 @@ -1815,7 +2001,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( // Reserve space for the linkage area on the stack. CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); - CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4); + CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -1876,7 +2062,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); - CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal); + CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); // Area that is at least reserved in the caller of this function. unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); @@ -2068,13 +2254,16 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( SmallVector<SDValue, 8> MemOps; unsigned nAltivecParamsAtEnd = 0; Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); - for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) { + unsigned CurArgIdx = 0; + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; unsigned ObjSize = ObjectVT.getSizeInBits()/8; unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; + std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); + CurArgIdx = Ins[ArgNo].OrigArgIndex; unsigned CurArgOffset = ArgOffset; @@ -2409,6 +2598,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin( SmallVector<SDValue, 8> MemOps; unsigned nAltivecParamsAtEnd = 0; + // FIXME: FuncArg and Ins[ArgNo] must reference the same argument. + // When passing anonymous aggregates, this is currently not true. + // See LowerFormalArguments_64SVR4 for a fix. Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) { SDValue ArgVal; @@ -2995,7 +3187,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, NodeTys.push_back(MVT::Other); // Returns a chain NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. - unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin; + unsigned CallOpc = PPCISD::CALL; bool needIndirectCall = true; if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { @@ -3128,8 +3320,11 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, NodeTys.push_back(MVT::Other); NodeTys.push_back(MVT::Glue); Ops.push_back(Chain); - CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin; + CallOpc = PPCISD::BCTRL; Callee.setNode(0); + // Add use of X11 (holding environment pointer) + if (isSVR4ABI && isPPC64) + Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); // Add CTR register as callee so a bctr can be emitted later. if (isTailCall) Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); @@ -3231,7 +3426,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // When performing tail call optimization the callee pops its arguments off // the stack. Account for this here so these bytes can be pushed back on in - // PPCRegisterInfo::eliminateCallFramePseudoInstr. + // PPCFrameLowering::eliminateCallFramePseudoInstr. int BytesCalleePops = (CallConv == CallingConv::Fast && getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; @@ -3247,17 +3442,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // Emit tail call. if (isTailCall) { - // If this is the first return lowered for this function, add the regs - // to the liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - CCInfo.AnalyzeCallResult(Ins, RetCC_PPC); - for (unsigned i = 0; i != RVLocs.size(); ++i) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - assert(((Callee.getOpcode() == ISD::Register && cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || @@ -3279,7 +3463,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, bool needsTOCRestore = false; if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { - if (CallOpc == PPCISD::BCTRL_SVR4) { + if (CallOpc == PPCISD::BCTRL) { // This is a call through a function pointer. // Restore the caller TOC from the save area into R2. // See PrepareCall() for more information about calls through function @@ -3290,9 +3474,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // from allocating it), resulting in an additional register being // allocated and an unnecessary move instruction being generated. needsTOCRestore = true; - } else if ((CallOpc == PPCISD::CALL_SVR4) && !isLocalCall(Callee)) { + } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) { // Otherwise insert NOP for non-local calls. - CallOpc = PPCISD::CALL_NOP_SVR4; + CallOpc = PPCISD::CALL_NOP; } } @@ -3401,11 +3585,11 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, bool Result; if (Outs[i].IsFixed) { - Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, - CCInfo); + Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, + CCInfo); } else { - Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, - ArgFlags, CCInfo); + Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, + ArgFlags, CCInfo); } if (Result) { @@ -3418,7 +3602,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, } } else { // All arguments are treated the same. - CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4); + CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); } // Assign locations to all of the outgoing aggregate by value arguments. @@ -3429,7 +3613,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); - CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal); + CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); // Size of the linkage area, parameter list area and the part of the local // space variable where copies of aggregates which are passed by value are @@ -4323,14 +4507,8 @@ PPCTargetLowering::LowerReturn(SDValue Chain, getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_PPC); - // If this is the first return lowered for this function, add the regs to the - // liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - SDValue Flag; + SmallVector<SDValue, 4> RetOps(1, Chain); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -4355,12 +4533,17 @@ PPCTargetLowering::LowerReturn(SDValue Chain, Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. if (Flag.getNode()) - return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag); - else - return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain); + RetOps.push_back(Flag); + + return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, + &RetOps[0], RetOps.size()); } SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, @@ -4466,6 +4649,21 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3); } +SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, + DAG.getVTList(MVT::i32, MVT::Other), + Op.getOperand(0), Op.getOperand(1)); +} + +SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, + Op.getOperand(0), Op.getOperand(1)); +} + /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { @@ -4553,37 +4751,72 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : - PPCISD::FCTIDZ, + (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ : + PPCISD::FCTIDZ), dl, MVT::f64, Src); break; case MVT::i64: - Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src); + assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) && + "i64 FP_TO_UINT is supported only with FPCVT"); + Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : + PPCISD::FCTIDUZ, + dl, MVT::f64, Src); break; } // Convert the FP value to an int value through memory. - SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64); + bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() && + (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()); + SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); + int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); + MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI); // Emit a store to the stack slot. - SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, - MachinePointerInfo(), false, false, 0); + SDValue Chain; + if (i32Stack) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); + SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; + Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, + DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops), + MVT::i32, MMO); + } else + Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, + MPI, false, false, 0); // Result is a load from the stack slot. If loading 4 bytes, make sure to // add in a bias. - if (Op.getValueType() == MVT::i32) + if (Op.getValueType() == MVT::i32 && !i32Stack) { FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, DAG.getConstant(4, FIPtr.getValueType())); - return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(), + MPI = MachinePointerInfo(); + } + + return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI, false, false, false, 0); } -SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, +SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // Don't handle ppc_fp128 here; let it be lowered to a libcall. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); + assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) && + "UINT_TO_FP is supported only with FPCVT"); + + // If we have FCFIDS, then use it when converting to single-precision. + // Otherwise, convert to double-precision and then round. + unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? + (Op.getOpcode() == ISD::UINT_TO_FP ? + PPCISD::FCFIDUS : PPCISD::FCFIDS) : + (Op.getOpcode() == ISD::UINT_TO_FP ? + PPCISD::FCFIDU : PPCISD::FCFID); + MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? + MVT::f32 : MVT::f64; + if (Op.getOperand(0).getValueType() == MVT::i64) { SDValue SINT = Op.getOperand(0); // When converting to single-precision, we actually need to convert @@ -4597,6 +4830,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, // However, if -enable-unsafe-fp-math is in effect, accept double // rounding to avoid the extra overhead. if (Op.getValueType() == MVT::f32 && + !PPCSubTarget.hasFPCVT() && !DAG.getTarget().Options.UnsafeFPMath) { // Twiddle input to make sure the low 11 bits are zero. (If this @@ -4630,44 +4864,69 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); } + SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); - SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits); - if (Op.getValueType() == MVT::f32) + SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); + + if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT()) FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); return FP; } assert(Op.getOperand(0).getValueType() == MVT::i32 && - "Unhandled SINT_TO_FP type in custom expander!"); + "Unhandled INT_TO_FP type in custom expander!"); // Since we only generate this in 64-bit mode, we can take advantage of // 64-bit registers. In particular, sign extend the input value into the // 64-bit register with extsw, store the WHOLE 64-bit value into the stack // then lfd it and fcfid it. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *FrameInfo = MF.getFrameInfo(); - int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); - SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32, + SDValue Ld; + if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) { + int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, + MachinePointerInfo::getFixedStack(FrameIdx), + false, false, 0); + + assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && + "Expected an i32 store"); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), + MachineMemOperand::MOLoad, 4, 4); + SDValue Ops[] = { Store, FIdx }; + Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? + PPCISD::LFIWZX : PPCISD::LFIWAX, + dl, DAG.getVTList(MVT::f64, MVT::Other), + Ops, 2, MVT::i32, MMO); + } else { + assert(PPCSubTarget.isPPC64() && + "i32->FP without LFIWAX supported only on PPC64"); + + int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + + SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Op.getOperand(0)); - // STD the extended value into the stack slot. - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), - MachineMemOperand::MOStore, 8, 8); - SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx }; - SDValue Store = - DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other), - Ops, 4, MVT::i64, MMO); - // Load the value as a double. - SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(), - false, false, false, 0); + // STD the extended value into the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx, + MachinePointerInfo::getFixedStack(FrameIdx), + false, false, 0); + + // Load the value as a double. + Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, + MachinePointerInfo::getFixedStack(FrameIdx), + false, false, false, 0); + } // FCFID it and return it. - SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld); - if (Op.getValueType() == MVT::f32) + SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); + if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT()) FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); return FP; } @@ -4697,12 +4956,13 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); EVT VT = Op.getValueType(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - std::vector<EVT> NodeTys; SDValue MFFSreg, InFlag; // Save FP Control Word to register - NodeTys.push_back(MVT::f64); // return register - NodeTys.push_back(MVT::Glue); // unused in this context + EVT NodeTys[] = { + MVT::f64, // return register + MVT::Glue // unused in this context + }; SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); // Save FP register to stack slot @@ -4936,11 +5196,21 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // Two instruction sequences. // If this value is in the range [-32,30] and is even, use: - // tmp = VSPLTI[bhw], result = add tmp, tmp - if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { - SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl); - Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res); - return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); + // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) + // If this value is in the range [17,31] and is odd, use: + // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) + // If this value is in the range [-31,-17] and is odd, use: + // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) + // Note the last two are three-instruction sequences. + if (SextVal >= -32 && SextVal <= 31) { + // To avoid having these optimizations undone by constant folding, + // we convert to a pseudo that will be expanded later into one of + // the above forms. + SDValue Elt = DAG.getConstant(SextVal, MVT::i32); + EVT VT = Op.getValueType(); + int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4); + SDValue EltSize = DAG.getConstant(Size, MVT::i32); + return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); } // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is @@ -5036,23 +5306,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, } } - // Three instruction sequences. - - // Odd, in range [17,31]: (vsplti C)-(vsplti -16). - if (SextVal >= 0 && SextVal <= 31) { - SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl); - SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); - LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS); - return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS); - } - // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). - if (SextVal >= -31 && SextVal <= 0) { - SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl); - SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl); - LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS); - return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS); - } - return SDValue(); } @@ -5326,9 +5579,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(3), // RHS DAG.getConstant(CompareOpc, MVT::i32) }; - std::vector<EVT> VTs; - VTs.push_back(Op.getOperand(2).getValueType()); - VTs.push_back(MVT::Glue); + EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); // Now that we have the comparison, emit a copy from the CR to a GPR. @@ -5470,11 +5721,15 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); + case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); + case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, Op.getDebugLoc()); - case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); // Lower 64-bit shifts. @@ -5528,50 +5783,8 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, MVT::f64, N->getOperand(0), DAG.getIntPtrConstant(1)); - // This sequence changes FPSCR to do round-to-zero, adds the two halves - // of the long double, and puts FPSCR back the way it was. We do not - // actually model FPSCR. - std::vector<EVT> NodeTys; - SDValue Ops[4], Result, MFFSreg, InFlag, FPreg; - - NodeTys.push_back(MVT::f64); // Return register - NodeTys.push_back(MVT::Glue); // Returns a flag for later insns - Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); - MFFSreg = Result.getValue(0); - InFlag = Result.getValue(1); - - NodeTys.clear(); - NodeTys.push_back(MVT::Glue); // Returns a flag - Ops[0] = DAG.getConstant(31, MVT::i32); - Ops[1] = InFlag; - Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2); - InFlag = Result.getValue(0); - - NodeTys.clear(); - NodeTys.push_back(MVT::Glue); // Returns a flag - Ops[0] = DAG.getConstant(30, MVT::i32); - Ops[1] = InFlag; - Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2); - InFlag = Result.getValue(0); - - NodeTys.clear(); - NodeTys.push_back(MVT::f64); // result of add - NodeTys.push_back(MVT::Glue); // Returns a flag - Ops[0] = Lo; - Ops[1] = Hi; - Ops[2] = InFlag; - Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3); - FPreg = Result.getValue(0); - InFlag = Result.getValue(1); - - NodeTys.clear(); - NodeTys.push_back(MVT::f64); - Ops[0] = DAG.getConstant(1, MVT::i32); - Ops[1] = MFFSreg; - Ops[2] = FPreg; - Ops[3] = InFlag; - Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4); - FPreg = Result.getValue(0); + // Add the two halves of the long double in round-to-zero mode. + SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); // We know the low half is about to be thrown away, so just use something // convenient. @@ -5663,7 +5876,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, // registers without caring whether they're 32 or 64, but here we're // doing actual arithmetic on the addresses. bool is64bit = PPCSubTarget.isPPC64(); - unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; + unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); @@ -5767,7 +5980,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, .addReg(TmpReg).addReg(MaskReg); BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) .addReg(Tmp3Reg).addReg(Tmp2Reg); - BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + BuildMI(BB, dl, TII->get(PPC::STWCX)) .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); @@ -5782,9 +5995,238 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, return BB; } +llvm::MachineBasicBlock* +PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + const BasicBlock *BB = MBB->getBasicBlock(); + MachineFunction::iterator I = MBB; + ++I; + + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + unsigned DstReg = MI->getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI.getRegClass(DstReg); + assert(RC->hasType(MVT::i32) && "Invalid destination!"); + unsigned mainDstReg = MRI.createVirtualRegister(RC); + unsigned restoreDstReg = MRI.createVirtualRegister(RC); + + MVT PVT = getPointerTy(); + assert((PVT == MVT::i64 || PVT == MVT::i32) && + "Invalid Pointer Size!"); + // For v = setjmp(buf), we generate + // + // thisMBB: + // SjLjSetup mainMBB + // bl mainMBB + // v_restore = 1 + // b sinkMBB + // + // mainMBB: + // buf[LabelOffset] = LR + // v_main = 0 + // + // sinkMBB: + // v = phi(main, restore) + // + + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(I, mainMBB); + MF->insert(I, sinkMBB); + + MachineInstrBuilder MIB; + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), MBB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); + + // Note that the structure of the jmp_buf used here is not compatible + // with that used by libc, and is not designed to be. Specifically, it + // stores only those 'reserved' registers that LLVM does not otherwise + // understand how to spill. Also, by convention, by the time this + // intrinsic is called, Clang has already stored the frame address in the + // first slot of the buffer and stack address in the third. Following the + // X86 target code, we'll store the jump address in the second slot. We also + // need to save the TOC pointer (R2) to handle jumps between shared + // libraries, and that will be stored in the fourth slot. The thread + // identifier (R13) is not affected. + + // thisMBB: + const int64_t LabelOffset = 1 * PVT.getStoreSize(); + const int64_t TOCOffset = 3 * PVT.getStoreSize(); + + // Prepare IP either in reg. + const TargetRegisterClass *PtrRC = getRegClassFor(PVT); + unsigned LabelReg = MRI.createVirtualRegister(PtrRC); + unsigned BufReg = MI->getOperand(1).getReg(); + + if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) { + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) + .addReg(PPC::X2) + .addImm(TOCOffset / 4) + .addReg(BufReg); + + MIB.setMemRefs(MMOBegin, MMOEnd); + } + + // Setup + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); + MIB.addRegMask(PPCRegInfo->getNoPreservedMask()); + + BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); + + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) + .addMBB(mainMBB); + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); + + thisMBB->addSuccessor(mainMBB, /* weight */ 0); + thisMBB->addSuccessor(sinkMBB, /* weight */ 1); + + // mainMBB: + // mainDstReg = 0 + MIB = BuildMI(mainMBB, DL, + TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); + + // Store IP + if (PPCSubTarget.isPPC64()) { + MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) + .addReg(LabelReg) + .addImm(LabelOffset / 4) + .addReg(BufReg); + } else { + MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) + .addReg(LabelReg) + .addImm(LabelOffset) + .addReg(BufReg); + } + + MIB.setMemRefs(MMOBegin, MMOEnd); + + BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); + mainMBB->addSuccessor(sinkMBB); + + // sinkMBB: + BuildMI(*sinkMBB, sinkMBB->begin(), DL, + TII->get(PPC::PHI), DstReg) + .addReg(mainDstReg).addMBB(mainMBB) + .addReg(restoreDstReg).addMBB(thisMBB); + + MI->eraseFromParent(); + return sinkMBB; +} + +MachineBasicBlock * +PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + MVT PVT = getPointerTy(); + assert((PVT == MVT::i64 || PVT == MVT::i32) && + "Invalid Pointer Size!"); + + const TargetRegisterClass *RC = + (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned Tmp = MRI.createVirtualRegister(RC); + // Since FP is only updated here but NOT referenced, it's treated as GPR. + unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; + unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; + + MachineInstrBuilder MIB; + + const int64_t LabelOffset = 1 * PVT.getStoreSize(); + const int64_t SPOffset = 2 * PVT.getStoreSize(); + const int64_t TOCOffset = 3 * PVT.getStoreSize(); + + unsigned BufReg = MI->getOperand(0).getReg(); + + // Reload FP (the jumped-to function may not have had a + // frame pointer, and if so, then its r31 will be restored + // as necessary). + if (PVT == MVT::i64) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) + .addImm(0) + .addReg(BufReg); + } else { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) + .addImm(0) + .addReg(BufReg); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + + // Reload IP + if (PVT == MVT::i64) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) + .addImm(LabelOffset / 4) + .addReg(BufReg); + } else { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) + .addImm(LabelOffset) + .addReg(BufReg); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + + // Reload SP + if (PVT == MVT::i64) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) + .addImm(SPOffset / 4) + .addReg(BufReg); + } else { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) + .addImm(SPOffset) + .addReg(BufReg); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + + // FIXME: When we also support base pointers, that register must also be + // restored here. + + // Reload TOC + if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) + .addImm(TOCOffset / 4) + .addReg(BufReg); + + MIB.setMemRefs(MMOBegin, MMOEnd); + } + + // Jump + BuildMI(*MBB, MI, DL, + TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); + BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); + + MI->eraseFromParent(); + return MBB; +} + MachineBasicBlock * PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { + if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 || + MI->getOpcode() == PPC::EH_SjLj_SetJmp64) { + return emitEHSjLjSetJmp(MI, BB); + } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 || + MI->getOpcode() == PPC::EH_SjLj_LongJmp64) { + return emitEHSjLjLongJmp(MI, BB); + } + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); // To "insert" these instructions we actually have to insert their @@ -5802,24 +6244,24 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, unsigned SelectPred = MI->getOperand(4).getImm(); DebugLoc dl = MI->getDebugLoc(); - // The SelectPred is ((BI << 5) | BO) for a BCC - unsigned BO = SelectPred & 0xF; - assert((BO == 12 || BO == 4) && "invalid predicate BO field for isel"); - - unsigned TrueOpNo, FalseOpNo; - if (BO == 12) { - TrueOpNo = 2; - FalseOpNo = 3; - } else { - TrueOpNo = 3; - FalseOpNo = 2; - SelectPred = PPC::InvertPredicate((PPC::Predicate)SelectPred); + unsigned SubIdx; + bool SwapOps; + switch (SelectPred) { + default: llvm_unreachable("invalid predicate for isel"); + case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break; + case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break; + case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break; + case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break; + case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break; + case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break; + case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break; + case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break; } BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(TrueOpNo).getReg()) - .addReg(MI->getOperand(FalseOpNo).getReg()) - .addImm(SelectPred).addReg(MI->getOperand(1).getReg()); + .addReg(MI->getOperand(SwapOps? 3 : 2).getReg()) + .addReg(MI->getOperand(SwapOps? 2 : 3).getReg()) + .addReg(MI->getOperand(1).getReg(), 0, SubIdx); } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || MI->getOpcode() == PPC::SELECT_CC_I8 || MI->getOpcode() == PPC::SELECT_CC_F4 || @@ -6052,7 +6494,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); unsigned Ptr1Reg; unsigned TmpReg = RegInfo.createVirtualRegister(RC); - unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; + unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; // thisMBB: // ... // fallthrough --> loopMBB @@ -6155,6 +6597,75 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB = exitMBB; BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) .addReg(ShiftReg); + } else if (MI->getOpcode() == PPC::FADDrtz) { + // This pseudo performs an FADD with rounding mode temporarily forced + // to round-to-zero. We emit this via custom inserter since the FPSCR + // is not modeled at the SelectionDAG level. + unsigned Dest = MI->getOperand(0).getReg(); + unsigned Src1 = MI->getOperand(1).getReg(); + unsigned Src2 = MI->getOperand(2).getReg(); + DebugLoc dl = MI->getDebugLoc(); + + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); + + // Save FPSCR value. + BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); + + // Set rounding mode to round-to-zero. + BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); + BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); + + // Perform addition. + BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); + + // Restore FPSCR value. + BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg); + } else if (MI->getOpcode() == PPC::FRINDrint || + MI->getOpcode() == PPC::FRINSrint) { + bool isf32 = MI->getOpcode() == PPC::FRINSrint; + unsigned Dest = MI->getOperand(0).getReg(); + unsigned Src = MI->getOperand(1).getReg(); + DebugLoc dl = MI->getDebugLoc(); + + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); + + // Perform the rounding. + BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest) + .addReg(Src); + + // Compare the results. + BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg) + .addReg(Dest).addReg(Src); + + // If the results were not equal, then set the FPSCR XX bit. + MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, midMBB); + F->insert(It, exitMBB); + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + BuildMI(*BB, MI, dl, TII->get(PPC::BCC)) + .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB); + + BB->addSuccessor(midMBB); + BB->addSuccessor(exitMBB); + + BB = midMBB; + + // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set + // the FI bit here because that will not automatically set XX also, + // and XX is what libm interprets as the FE_INEXACT flag. + BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6); + BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); + + BB->addSuccessor(exitMBB); + + BB = exitMBB; } else { llvm_unreachable("Unexpected instr type to insert"); } @@ -6167,6 +6678,139 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Target Optimization Hooks //===----------------------------------------------------------------------===// +SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op, + DAGCombinerInfo &DCI) const { + if (DCI.isAfterLegalizeVectorOps()) + return SDValue(); + + EVT VT = Op.getValueType(); + + if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) || + (VT == MVT::f64 && PPCSubTarget.hasFRE()) || + (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) { + + // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) + // For the reciprocal, we need to find the zero of the function: + // F(X) = A X - 1 [which has a zero at X = 1/A] + // => + // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form + // does not require additional intermediate precision] + + // Convergence is quadratic, so we essentially double the number of digits + // correct after every iteration. The minimum architected relative + // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has + // 23 digits and double has 52 digits. + int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3; + if (VT.getScalarType() == MVT::f64) + ++Iterations; + + SelectionDAG &DAG = DCI.DAG; + DebugLoc dl = Op.getDebugLoc(); + + SDValue FPOne = + DAG.getConstantFP(1.0, VT.getScalarType()); + if (VT.isVector()) { + assert(VT.getVectorNumElements() == 4 && + "Unknown vector type"); + FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, + FPOne, FPOne, FPOne, FPOne); + } + + SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op); + DCI.AddToWorklist(Est.getNode()); + + // Newton iterations: Est = Est + Est (1 - Arg * Est) + for (int i = 0; i < Iterations; ++i) { + SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est); + DCI.AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst); + DCI.AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst); + DCI.AddToWorklist(NewEst.getNode()); + + Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst); + DCI.AddToWorklist(Est.getNode()); + } + + return Est; + } + + return SDValue(); +} + +SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op, + DAGCombinerInfo &DCI) const { + if (DCI.isAfterLegalizeVectorOps()) + return SDValue(); + + EVT VT = Op.getValueType(); + + if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) || + (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) || + (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) { + + // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) + // For the reciprocal sqrt, we need to find the zero of the function: + // F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] + // => + // X_{i+1} = X_i (1.5 - A X_i^2 / 2) + // As a result, we precompute A/2 prior to the iteration loop. + + // Convergence is quadratic, so we essentially double the number of digits + // correct after every iteration. The minimum architected relative + // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has + // 23 digits and double has 52 digits. + int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3; + if (VT.getScalarType() == MVT::f64) + ++Iterations; + + SelectionDAG &DAG = DCI.DAG; + DebugLoc dl = Op.getDebugLoc(); + + SDValue FPThreeHalves = + DAG.getConstantFP(1.5, VT.getScalarType()); + if (VT.isVector()) { + assert(VT.getVectorNumElements() == 4 && + "Unknown vector type"); + FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, + FPThreeHalves, FPThreeHalves, + FPThreeHalves, FPThreeHalves); + } + + SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op); + DCI.AddToWorklist(Est.getNode()); + + // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that + // this entire sequence requires only one FP constant. + SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op); + DCI.AddToWorklist(HalfArg.getNode()); + + HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op); + DCI.AddToWorklist(HalfArg.getNode()); + + // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) + for (int i = 0; i < Iterations; ++i) { + SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est); + DCI.AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst); + DCI.AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst); + DCI.AddToWorklist(NewEst.getNode()); + + Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst); + DCI.AddToWorklist(Est.getNode()); + } + + return Est; + } + + return SDValue(); +} + SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { const TargetMachine &TM = getTargetMachine(); @@ -6193,7 +6837,72 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, return N->getOperand(0); } break; + case ISD::FDIV: { + assert(TM.Options.UnsafeFPMath && + "Reciprocal estimates require UnsafeFPMath"); + + if (N->getOperand(1).getOpcode() == ISD::FSQRT) { + SDValue RV = + DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI); + if (RV.getNode() != 0) { + DCI.AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), + N->getOperand(0), RV); + } + } else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND && + N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) { + SDValue RV = + DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0), + DCI); + if (RV.getNode() != 0) { + DCI.AddToWorklist(RV.getNode()); + RV = DAG.getNode(ISD::FP_EXTEND, N->getOperand(1).getDebugLoc(), + N->getValueType(0), RV); + DCI.AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), + N->getOperand(0), RV); + } + } else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND && + N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) { + SDValue RV = + DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0), + DCI); + if (RV.getNode() != 0) { + DCI.AddToWorklist(RV.getNode()); + RV = DAG.getNode(ISD::FP_ROUND, N->getOperand(1).getDebugLoc(), + N->getValueType(0), RV, + N->getOperand(1).getOperand(1)); + DCI.AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), + N->getOperand(0), RV); + } + } + + SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI); + if (RV.getNode() != 0) { + DCI.AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), + N->getOperand(0), RV); + } + + } + break; + case ISD::FSQRT: { + assert(TM.Options.UnsafeFPMath && + "Reciprocal estimates require UnsafeFPMath"); + + // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the + // reciprocal sqrt. + SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI); + if (RV.getNode() != 0) { + DCI.AddToWorklist(RV.getNode()); + RV = DAGCombineFastRecip(RV, DCI); + if (RV.getNode() != 0) + return RV; + } + } + break; case ISD::SINT_TO_FP: if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { @@ -6240,8 +6949,15 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); DCI.AddToWorklist(Val.getNode()); - Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val, - N->getOperand(2), N->getOperand(3)); + SDValue Ops[] = { + N->getOperand(0), Val, N->getOperand(2), + DAG.getValueType(N->getOperand(1).getValueType()) + }; + + Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, + DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops), + cast<StoreSDNode>(N)->getMemoryVT(), + cast<StoreSDNode>(N)->getMemOperand()); DCI.AddToWorklist(Val.getNode()); return Val; } @@ -6251,7 +6967,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, N->getOperand(1).getOpcode() == ISD::BSWAP && N->getOperand(1).getNode()->hasOneUse() && (N->getOperand(1).getValueType() == MVT::i32 || - N->getOperand(1).getValueType() == MVT::i16)) { + N->getOperand(1).getValueType() == MVT::i16 || + (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && + TM.getSubtarget<PPCSubtarget>().isPPC64() && + N->getOperand(1).getValueType() == MVT::i64))) { SDValue BSwapOp = N->getOperand(1).getOperand(0); // Do an any-extend to 32-bits if this is a half-word input. if (BSwapOp.getValueType() == MVT::i16) @@ -6272,7 +6991,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // Turn BSWAP (LOAD) -> lhbrx/lwbrx. if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && N->getOperand(0).hasOneUse() && - (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) { + (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || + (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && + TM.getSubtarget<PPCSubtarget>().isPPC64() && + N->getValueType(0) == MVT::i64))) { SDValue Load = N->getOperand(0); LoadSDNode *LD = cast<LoadSDNode>(Load); // Create the byte-swapping load. @@ -6283,8 +7005,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, }; SDValue BSLoad = DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, - DAG.getVTList(MVT::i32, MVT::Other), Ops, 3, - LD->getMemoryVT(), LD->getMemOperand()); + DAG.getVTList(N->getValueType(0) == MVT::i64 ? + MVT::i64 : MVT::i32, MVT::Other), + Ops, 3, LD->getMemoryVT(), LD->getMemOperand()); // If this is an i16 load, insert the truncate. SDValue ResVal = BSLoad; @@ -6384,14 +7107,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); // Create the PPCISD altivec 'dot' comparison node. - std::vector<EVT> VTs; SDValue Ops[] = { LHS.getOperand(2), // LHS of compare LHS.getOperand(3), // RHS of compare DAG.getConstant(CompareOpc, MVT::i32) }; - VTs.push_back(LHS.getOperand(2).getValueType()); - VTs.push_back(MVT::Glue); + EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); // Unpack the result based on how the target uses it. @@ -6543,6 +7264,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // GCC RS6000 Constraint Letters switch (Constraint[0]) { case 'b': // R1-R31 + if (VT == MVT::i64 && PPCSubTarget.isPPC64()) + return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); + return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); case 'r': // R0-R31 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) return std::make_pair(0U, &PPC::G8RCRegClass); @@ -6727,13 +7451,16 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); - bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) || - MFI->hasVarSizedObjects()) && - MFI->getStackSize() && - !MF.getFunction()->getFnAttributes(). - hasAttribute(Attributes::Naked); - unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) : - (is31 ? PPC::R31 : PPC::R1); + + // Naked functions never have a frame pointer, and so we use r1. For all + // other functions, this decision must be delayed until during PEI. + unsigned FrameReg; + if (MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::Naked)) + FrameReg = isPPC64 ? PPC::X1 : PPC::R1; + else + FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT); while (Depth--) @@ -6754,16 +7481,15 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, -/// probably because the source does not need to be loaded. If -/// 'IsZeroVal' is true, that means it's safe to return a -/// non-scalar-integer type, e.g. empty string source, constant, or loaded -/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is -/// constant so it does not need to be loaded. +/// probably because the source does not need to be loaded. If 'IsMemset' is +/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that +/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy +/// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { if (this->PPCSubTarget.isPPC64()) { @@ -6773,6 +7499,32 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, } } +bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, + bool *Fast) const { + if (DisablePPCUnaligned) + return false; + + // PowerPC supports unaligned memory access for simple non-vector types. + // Although accessing unaligned addresses is not as efficient as accessing + // aligned addresses, it is generally more efficient than manual expansion, + // and generally only traps for software emulation when crossing page + // boundaries. + + if (!VT.isSimple()) + return false; + + if (VT.getSimpleVT().isVector()) + return false; + + if (VT == MVT::ppcf128) + return false; + + if (Fast) + *Fast = true; + + return true; +} + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h index b3c7f9c..7157b70 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -16,9 +16,10 @@ #define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H #include "PPC.h" +#include "PPCRegisterInfo.h" #include "PPCSubtarget.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" namespace llvm { namespace PPCISD { @@ -35,14 +36,21 @@ namespace llvm { /// was temporarily in the f64 operand. FCFID, + /// Newer FCFID[US] integer-to-floating-point conversion instructions for + /// unsigned integers and single-precision outputs. + FCFIDU, FCFIDS, FCFIDUS, + /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 /// operand, producing an f64 value containing the integer representation /// of that FP value. FCTIDZ, FCTIWZ, - /// STFIWX - The STFIWX instruction. The first operand is an input token - /// chain, then an f64 value to store, then an address to store it to. - STFIWX, + /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for + /// unsigned integers. + FCTIDUZ, FCTIWUZ, + + /// Reciprocal estimate instructions (unary FP ops). + FRE, FRSQRTE, // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking // three v4f32 operands and producing a v4f32 result. @@ -90,17 +98,10 @@ namespace llvm { /// code. SRL, SRA, SHL, - /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit" - /// registers. - EXTSW_32, - /// CALL - A direct function call. - /// CALL_NOP_SVR4 is a call with the special NOP which follows 64-bit + /// CALL_NOP is a call with the special NOP which follows 64-bit /// SVR4 calls. - CALL_Darwin, CALL_SVR4, CALL_NOP_SVR4, - - /// NOP - Special NOP which follows 64-bit SVR4 calls. - NOP, + CALL, CALL_NOP, /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a /// MTCTR instruction. @@ -108,7 +109,7 @@ namespace llvm { /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a /// BCTRL instruction. - BCTRL_Darwin, BCTRL_SVR4, + BCTRL, /// Return with a flag operand, matched by 'blr' RET_FLAG, @@ -119,6 +120,12 @@ namespace llvm { /// are undefined. MFCR, + // EH_SJLJ_SETJMP - SjLj exception handling setjmp. + EH_SJLJ_SETJMP, + + // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. + EH_SJLJ_LONGJMP, + /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* /// instructions. For lack of better number, we use the opcode number /// encoding for the OPC field to identify the compare. For example, 838 @@ -138,26 +145,13 @@ namespace llvm { /// an optional input flag argument. COND_BRANCH, - // The following 5 instructions are used only as part of the - // long double-to-int conversion sequence. - - /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the - /// register. - MFFS, - - /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR. - MTFSB0, - - /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR. - MTFSB1, - - /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with - /// rounding towards zero. It has flags added so it won't move past the - /// FPSCR-setting instructions. + /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding + /// towards zero. Used only as part of the long double-to-int + /// conversion sequence. FADDRTZ, - /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR. - MTFSF, + /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register. + MFFS, /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and /// reserve indexed. This is used to implement atomic operations. @@ -178,20 +172,111 @@ namespace llvm { CR6SET, CR6UNSET, - /// STD_32 - This is the STD instruction for use with "32-bit" registers. - STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, + /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec + /// TLS model, produces an ADDIS8 instruction that adds the GOT + /// base to sym@got@tprel@ha. + ADDIS_GOT_TPREL_HA, + + /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec + /// TLS model, produces a LD instruction with base register G8RReg + /// and offset sym@got@tprel@l. This completes the addition that + /// finds the offset of "sym" relative to the thread pointer. + LD_GOT_TPREL_L, + + /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS + /// model, produces an ADD instruction that adds the contents of + /// G8RReg to the thread pointer. Symbol contains a relocation + /// sym@tls which is to be replaced by the thread pointer and + /// identifies to the linker that the instruction is part of a + /// TLS sequence. + ADD_TLS, + + /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS + /// model, produces an ADDIS8 instruction that adds the GOT base + /// register to sym@got@tlsgd@ha. + ADDIS_TLSGD_HA, + + /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS + /// model, produces an ADDI8 instruction that adds G8RReg to + /// sym@got@tlsgd@l. + ADDI_TLSGD_L, + + /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS + /// model, produces a call to __tls_get_addr(sym@tlsgd). + GET_TLS_ADDR, + + /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS + /// model, produces an ADDIS8 instruction that adds the GOT base + /// register to sym@got@tlsld@ha. + ADDIS_TLSLD_HA, + + /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS + /// model, produces an ADDI8 instruction that adds G8RReg to + /// sym@got@tlsld@l. + ADDI_TLSLD_L, + + /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS + /// model, produces a call to __tls_get_addr(sym@tlsld). + GET_TLSLD_ADDR, + + /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the + /// local-dynamic TLS model, produces an ADDIS8 instruction + /// that adds X3 to sym@dtprel@ha. The Chain operand is needed + /// to tie this in place following a copy to %X3 from the result + /// of a GET_TLSLD_ADDR. + ADDIS_DTPREL_HA, + + /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS + /// model, produces an ADDI8 instruction that adds G8RReg to + /// sym@got@dtprel@l. + ADDI_DTPREL_L, + + /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded + /// during instruction selection to optimize a BUILD_VECTOR into + /// operations on splats. This is necessary to avoid losing these + /// optimizations due to constant folding. + VADD_SPLAT, /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or /// i32. - STBRX, + STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE, /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, /// then puts it in the bottom bits of the GPRC. TYPE can be either i16 /// or i32. - LBRX + LBRX, + + /// STFIWX - The STFIWX instruction. The first operand is an input token + /// chain, then an f64 value to store, then an address to store it to. + STFIWX, + + /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point + /// load which sign-extends from a 32-bit integer value into the + /// destination 64-bit register. + LFIWAX, + + /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point + /// load which zero-extends from a 32-bit integer value into the + /// destination 64-bit register. + LFIWZX, + + /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model, + /// produces an ADDIS8 instruction that adds the TOC base register to + /// sym@toc@ha. + ADDIS_TOC_HA, + + /// G8RC = LD_TOC_L Symbol, G8RReg - For medium and large code model, + /// produces a LD instruction with base register G8RReg and offset + /// sym@toc@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset. + LD_TOC_L, + + /// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces + /// an ADDI8 instruction that adds G8RReg to sym@toc@l. + /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset. + ADDI_TOC_L }; } @@ -241,6 +326,7 @@ namespace llvm { class PPCTargetLowering : public TargetLowering { const PPCSubtarget &PPCSubTarget; + const PPCRegisterInfo *PPCRegInfo; public: explicit PPCTargetLowering(PPCTargetMachine &TM); @@ -249,7 +335,7 @@ namespace llvm { /// DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; - virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } /// getSetCCResultType - Return the ISD::SETCC ValueType virtual EVT getSetCCResultType(EVT VT) const; @@ -315,6 +401,12 @@ namespace llvm { MachineBasicBlock *MBB, bool is8bit, unsigned Opcode) const; + MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const; + + MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const; + ConstraintType getConstraintType(const std::string &Constraint) const; /// Examine constraint string and operand type and determine a weight value. @@ -358,18 +450,21 @@ namespace llvm { /// lowering. If DstAlign is zero that means it's safe to destination /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, - /// probably because the source does not need to be loaded. If - /// 'IsZeroVal' is true, that means it's safe to return a - /// non-scalar-integer type, e.g. empty string source, constant, or loaded - /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is - /// constant so it does not need to be loaded. + /// probably because the source does not need to be loaded. If 'IsMemset' is + /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that + /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy + /// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. virtual EVT - getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, bool MemcpyStrSrc, + getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; + /// Is unaligned memory access allowed for the given type, and is it fast + /// relative to software emulation. + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const; + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd @@ -415,7 +510,7 @@ namespace llvm { const PPCSubtarget &Subtarget) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const; - SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; @@ -525,6 +620,12 @@ namespace llvm { const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + + SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; + + SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const; + SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const; }; } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 9711452..fa5b65f 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -30,8 +30,12 @@ def symbolLo64 : Operand<i64> { let EncoderMethod = "getLO16Encoding"; } def tocentry : Operand<iPTR> { - let MIOperandInfo = (ops i32imm:$imm); + let MIOperandInfo = (ops i64imm:$imm); } +def tlsreg : Operand<i64> { + let EncoderMethod = "getTLSRegEncoding"; +} +def tlsgd : Operand<i64> {} //===----------------------------------------------------------------------===// // 64-bit transformation functions. @@ -62,123 +66,112 @@ def HI48_64 : SDNodeXForm<imm, [{ // Calls. // +let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { + let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in + def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, + Requires<[In64BitMode]>; +} + let Defs = [LR8] in def MovePCtoLR8 : Pseudo<(outs), (ins), "#MovePCtoLR8", []>, PPC970_Unit_BRU; -// Darwin ABI Calls. -let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { - // Convenient aliases for call instructions - let Uses = [RM] in { - def BL8_Darwin : IForm<18, 0, 1, - (outs), (ins calltarget:$func), - "bl $func", BrB, []>; // See Pat patterns below. - def BLA8_Darwin : IForm<18, 1, 1, - (outs), (ins aaddr:$func), - "bla $func", BrB, [(PPCcall_Darwin (i64 imm:$func))]>; - } - let Uses = [CTR8, RM] in { - def BCTRL8_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins), - "bctrl", BrB, - [(PPCbctrl_Darwin)]>, Requires<[In64BitMode]>; +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { + let Defs = [CTR8], Uses = [CTR8] in { + def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), + "bdz $dst">; + def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), + "bdnz $dst">; } } -// ELF 64 ABI Calls = Darwin ABI Calls -// Used to define BL8_ELF and BLA8_ELF let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { // Convenient aliases for call instructions let Uses = [RM] in { - def BL8_ELF : IForm<18, 0, 1, - (outs), (ins calltarget:$func), - "bl $func", BrB, []>; // See Pat patterns below. + def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func), + "bl $func", BrB, []>; // See Pat patterns below. - let isCodeGenOnly = 1 in - def BL8_NOP_ELF : IForm_and_DForm_4_zero<18, 0, 1, 24, + def BLA8 : IForm<18, 1, 1, (outs), (ins aaddr:$func), + "bla $func", BrB, [(PPCcall (i64 imm:$func))]>; + } + let Uses = [RM], isCodeGenOnly = 1 in { + def BL8_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24, (outs), (ins calltarget:$func), "bl $func\n\tnop", BrB, []>; - def BLA8_ELF : IForm<18, 1, 1, - (outs), (ins aaddr:$func), - "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>; + def BL8_NOP_TLSGD : IForm_and_DForm_4_zero<18, 0, 1, 24, + (outs), (ins calltarget:$func, tlsgd:$sym), + "bl $func($sym)\n\tnop", BrB, []>; - let isCodeGenOnly = 1 in - def BLA8_NOP_ELF : IForm_and_DForm_4_zero<18, 1, 1, 24, + def BL8_NOP_TLSLD : IForm_and_DForm_4_zero<18, 0, 1, 24, + (outs), (ins calltarget:$func, tlsgd:$sym), + "bl $func($sym)\n\tnop", BrB, []>; + + def BLA8_NOP : IForm_and_DForm_4_zero<18, 1, 1, 24, (outs), (ins aaddr:$func), "bla $func\n\tnop", BrB, - [(PPCcall_nop_SVR4 (i64 imm:$func))]>; + [(PPCcall_nop (i64 imm:$func))]>; } - let Uses = [X11, CTR8, RM] in { - def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins), - "bctrl", BrB, - [(PPCbctrl_SVR4)]>, Requires<[In64BitMode]>; + let Uses = [CTR8, RM] in { + def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins), + "bctrl", BrB, [(PPCbctrl)]>, + Requires<[In64BitMode]>; } } // Calls -def : Pat<(PPCcall_Darwin (i64 tglobaladdr:$dst)), - (BL8_Darwin tglobaladdr:$dst)>; -def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)), - (BL8_Darwin texternalsym:$dst)>; +def : Pat<(PPCcall (i64 tglobaladdr:$dst)), + (BL8 tglobaladdr:$dst)>; +def : Pat<(PPCcall_nop (i64 tglobaladdr:$dst)), + (BL8_NOP tglobaladdr:$dst)>; -def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)), - (BL8_ELF tglobaladdr:$dst)>; -def : Pat<(PPCcall_nop_SVR4 (i64 tglobaladdr:$dst)), - (BL8_NOP_ELF tglobaladdr:$dst)>; - -def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)), - (BL8_ELF texternalsym:$dst)>; -def : Pat<(PPCcall_nop_SVR4 (i64 texternalsym:$dst)), - (BL8_NOP_ELF texternalsym:$dst)>; - -def : Pat<(PPCnop), - (NOP)>; +def : Pat<(PPCcall (i64 texternalsym:$dst)), + (BL8 texternalsym:$dst)>; +def : Pat<(PPCcall_nop (i64 texternalsym:$dst)), + (BL8_NOP texternalsym:$dst)>; // Atomic operations let usesCustomInserter = 1 in { let Defs = [CR0] in { def ATOMIC_LOAD_ADD_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_ADD_I64", - [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>; + [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_LOAD_SUB_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_SUB_I64", - [(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>; + [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_LOAD_OR_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_OR_I64", - [(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>; + [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_LOAD_XOR_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_XOR_I64", - [(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>; + [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_LOAD_AND_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_AND_i64", - [(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>; + [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_LOAD_NAND_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_NAND_I64", - [(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>; + [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_CMP_SWAP_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "#ATOMIC_CMP_SWAP_I64", - [(set G8RC:$dst, - (atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>; + [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>; def ATOMIC_SWAP_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "#ATOMIC_SWAP_I64", - [(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>; + [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>; } } // Instructions to support atomic operations def LDARX : XForm_1<31, 84, (outs G8RC:$rD), (ins memrr:$ptr), "ldarx $rD, $ptr", LdStLDARX, - [(set G8RC:$rD, (PPClarx xoaddr:$ptr))]>; + [(set i64:$rD, (PPClarx xoaddr:$ptr))]>; let Defs = [CR0] in def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst), "stdcx. $rS, $dst", LdStSTDCX, - [(PPCstcx G8RC:$rS, xoaddr:$dst)]>, + [(PPCstcx i64:$rS, xoaddr:$dst)]>, isDOT; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in @@ -197,17 +190,12 @@ def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset), "#TC_RETURNr8 $dst $offset", []>; +let isCodeGenOnly = 1 in { let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, - isIndirectBranch = 1, isCall = 1, Uses = [CTR8, RM] in { - let isReturn = 1 in { - def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, - Requires<[In64BitMode]>; - } - - def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, - Requires<[In64BitMode]>; -} + isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR8, RM] in +def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, + Requires<[In64BitMode]>; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, @@ -223,6 +211,8 @@ def TAILBA8 : IForm<18, 0, 0, (outs), (ins aaddr:$dst), "ba $dst", BrB, []>; +} + def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm), (TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>; @@ -232,20 +222,13 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm), def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), (TCRETURNri8 CTRRC8:$dst, imm:$imm)>; -let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { - let Defs = [CTR8], Uses = [CTR8] in { - def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), - "bdz $dst">; - def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), - "bdnz $dst">; - } -} -// 64-but CR instructions +// 64-bit CR instructions def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; +let isCodeGenOnly = 1 in def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM), "#MFCR8pseud", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; @@ -254,6 +237,18 @@ def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins), "mfcr $rT", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; +let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { + def EH_SjLj_SetJmp64 : Pseudo<(outs GPRC:$dst), (ins memr:$buf), + "#EH_SJLJ_SETJMP64", + [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>, + Requires<[In64BitMode]>; + let isTerminator = 1 in + def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf), + "#EH_SJLJ_LONGJMP64", + [(PPCeh_sjlj_longjmp addr:$buf)]>, + Requires<[In64BitMode]>; +} + //===----------------------------------------------------------------------===// // 64-bit SPR manipulation instrs. @@ -262,13 +257,13 @@ def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs G8RC:$rT), (ins), "mfctr $rT", SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } -let Pattern = [(PPCmtctr G8RC:$rS)], Defs = [CTR8] in { +let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in { def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS), "mtctr $rS", SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } -let Pattern = [(set G8RC:$rT, readcyclecounter)] in +let Pattern = [(set i64:$rT, readcyclecounter)] in def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins), "mfspr $rT, 268", SprMFTB>, PPC970_DGroup_First, PPC970_Unit_FXU; @@ -279,8 +274,8 @@ def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins), let Defs = [X1], Uses = [X1] in def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"#DYNALLOC8", - [(set G8RC:$result, - (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>; + [(set i64:$result, + (PPCdynalloc i64:$negsize, iaddr:$fpsi))]>; let Defs = [LR8] in { def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins G8RC:$rS), @@ -302,126 +297,129 @@ let PPC970_Unit = 1 in { // FXU Operations. let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm), "li $rD, $imm", IntSimple, - [(set G8RC:$rD, immSExt16:$imm)]>; + [(set i64:$rD, immSExt16:$imm)]>; def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm), "lis $rD, $imm", IntSimple, - [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>; + [(set i64:$rD, imm16ShiftedSExt:$imm)]>; } // Logical ops. def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "nand $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (not (and G8RC:$rS, G8RC:$rB)))]>; + [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>; def AND8 : XForm_6<31, 28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "and $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (and G8RC:$rS, G8RC:$rB))]>; + [(set i64:$rA, (and i64:$rS, i64:$rB))]>; def ANDC8: XForm_6<31, 60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "andc $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (and G8RC:$rS, (not G8RC:$rB)))]>; + [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>; def OR8 : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "or $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (or G8RC:$rS, G8RC:$rB))]>; + [(set i64:$rA, (or i64:$rS, i64:$rB))]>; def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "nor $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (not (or G8RC:$rS, G8RC:$rB)))]>; + [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>; def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "orc $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (or G8RC:$rS, (not G8RC:$rB)))]>; + [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>; def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "eqv $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (not (xor G8RC:$rS, G8RC:$rB)))]>; + [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>; def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "xor $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (xor G8RC:$rS, G8RC:$rB))]>; + [(set i64:$rA, (xor i64:$rS, i64:$rB))]>; // Logical ops with immediate. def ANDIo8 : DForm_4<28, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), "andi. $dst, $src1, $src2", IntGeneral, - [(set G8RC:$dst, (and G8RC:$src1, immZExt16:$src2))]>, + [(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>, isDOT; def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), "andis. $dst, $src1, $src2", IntGeneral, - [(set G8RC:$dst, (and G8RC:$src1,imm16ShiftedZExt:$src2))]>, + [(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>, isDOT; def ORI8 : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), "ori $dst, $src1, $src2", IntSimple, - [(set G8RC:$dst, (or G8RC:$src1, immZExt16:$src2))]>; + [(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>; def ORIS8 : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), "oris $dst, $src1, $src2", IntSimple, - [(set G8RC:$dst, (or G8RC:$src1, imm16ShiftedZExt:$src2))]>; + [(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>; def XORI8 : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), "xori $dst, $src1, $src2", IntSimple, - [(set G8RC:$dst, (xor G8RC:$src1, immZExt16:$src2))]>; + [(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>; def XORIS8 : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), "xoris $dst, $src1, $src2", IntSimple, - [(set G8RC:$dst, (xor G8RC:$src1, imm16ShiftedZExt:$src2))]>; + [(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>; def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "add $rT, $rA, $rB", IntSimple, - [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>; + [(set i64:$rT, (add i64:$rA, i64:$rB))]>; +// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the +// initial-exec thread-local storage model. +let isCodeGenOnly = 1 in +def ADD8TLS : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, tlsreg:$rB), + "add $rT, $rA, $rB@tls", IntSimple, + [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>; let Defs = [CARRY] in { def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "addc $rT, $rA, $rB", IntGeneral, - [(set G8RC:$rT, (addc G8RC:$rA, G8RC:$rB))]>, + [(set i64:$rT, (addc i64:$rA, i64:$rB))]>, PPC970_DGroup_Cracked; def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), "addic $rD, $rA, $imm", IntGeneral, - [(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>; + [(set i64:$rD, (addc i64:$rA, immSExt16:$imm))]>; } -def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), - "addi $rD, $rA, $imm", IntSimple, - [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>; -def ADDI8L : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, symbolLo64:$imm), +def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolLo64:$imm), "addi $rD, $rA, $imm", IntSimple, - [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>; -def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm), + [(set i64:$rD, (add i64:$rA, immSExt16:$imm))]>; +def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolHi64:$imm), "addis $rD, $rA, $imm", IntSimple, - [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>; + [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>; let Defs = [CARRY] in { def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), "subfic $rD, $rA, $imm", IntGeneral, - [(set G8RC:$rD, (subc immSExt16:$imm, G8RC:$rA))]>; + [(set i64:$rD, (subc immSExt16:$imm, i64:$rA))]>; def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "subfc $rT, $rA, $rB", IntGeneral, - [(set G8RC:$rT, (subc G8RC:$rB, G8RC:$rA))]>, + [(set i64:$rT, (subc i64:$rB, i64:$rA))]>, PPC970_DGroup_Cracked; } def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "subf $rT, $rA, $rB", IntGeneral, - [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>; + [(set i64:$rT, (sub i64:$rB, i64:$rA))]>; def NEG8 : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA), "neg $rT, $rA", IntSimple, - [(set G8RC:$rT, (ineg G8RC:$rA))]>; + [(set i64:$rT, (ineg i64:$rA))]>; let Uses = [CARRY], Defs = [CARRY] in { def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "adde $rT, $rA, $rB", IntGeneral, - [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>; + [(set i64:$rT, (adde i64:$rA, i64:$rB))]>; def ADDME8 : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA), "addme $rT, $rA", IntGeneral, - [(set G8RC:$rT, (adde G8RC:$rA, -1))]>; + [(set i64:$rT, (adde i64:$rA, -1))]>; def ADDZE8 : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA), "addze $rT, $rA", IntGeneral, - [(set G8RC:$rT, (adde G8RC:$rA, 0))]>; + [(set i64:$rT, (adde i64:$rA, 0))]>; def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "subfe $rT, $rA, $rB", IntGeneral, - [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>; + [(set i64:$rT, (sube i64:$rB, i64:$rA))]>; def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA), "subfme $rT, $rA", IntGeneral, - [(set G8RC:$rT, (sube -1, G8RC:$rA))]>; + [(set i64:$rT, (sube -1, i64:$rA))]>; def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA), "subfze $rT, $rA", IntGeneral, - [(set G8RC:$rT, (sube 0, G8RC:$rA))]>; + [(set i64:$rT, (sube 0, i64:$rA))]>; } def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "mulhd $rT, $rA, $rB", IntMulHW, - [(set G8RC:$rT, (mulhs G8RC:$rA, G8RC:$rB))]>; + [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>; def MULHDU : XOForm_1<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "mulhdu $rT, $rA, $rB", IntMulHWU, - [(set G8RC:$rT, (mulhu G8RC:$rA, G8RC:$rB))]>; + [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>; def CMPD : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB), "cmpd $crD, $rA, $rB", IntCompare>, isPPC64; @@ -434,54 +432,60 @@ def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2), def SLD : XForm_6<31, 27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), "sld $rA, $rS, $rB", IntRotateD, - [(set G8RC:$rA, (PPCshl G8RC:$rS, GPRC:$rB))]>, isPPC64; + [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64; def SRD : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), "srd $rA, $rS, $rB", IntRotateD, - [(set G8RC:$rA, (PPCsrl G8RC:$rS, GPRC:$rB))]>, isPPC64; + [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64; let Defs = [CARRY] in { def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), "srad $rA, $rS, $rB", IntRotateD, - [(set G8RC:$rA, (PPCsra G8RC:$rS, GPRC:$rB))]>, isPPC64; + [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64; } def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS), "extsb $rA, $rS", IntSimple, - [(set G8RC:$rA, (sext_inreg G8RC:$rS, i8))]>; + [(set i64:$rA, (sext_inreg i64:$rS, i8))]>; def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS), "extsh $rA, $rS", IntSimple, - [(set G8RC:$rA, (sext_inreg G8RC:$rS, i16))]>; + [(set i64:$rA, (sext_inreg i64:$rS, i16))]>; def EXTSW : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS), "extsw $rA, $rS", IntSimple, - [(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64; -/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers. -def EXTSW_32 : XForm_11<31, 986, (outs GPRC:$rA), (ins GPRC:$rS), - "extsw $rA, $rS", IntSimple, - [(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64; + [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64; def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS), "extsw $rA, $rS", IntSimple, - [(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64; + [(set i64:$rA, (sext i32:$rS))]>, isPPC64; let Defs = [CARRY] in { def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH), "sradi $rA, $rS, $SH", IntRotateDI, - [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64; + [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64; } def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS), "cntlzd $rA, $rS", IntGeneral, - [(set G8RC:$rA, (ctlz G8RC:$rS))]>; + [(set i64:$rA, (ctlz i64:$rS))]>; +def POPCNTD : XForm_11<31, 506, (outs G8RC:$rA), (ins G8RC:$rS), + "popcntd $rA, $rS", IntGeneral, + [(set i64:$rA, (ctpop i64:$rS))]>; + +// popcntw also does a population count on the high 32 bits (storing the +// results in the high 32-bits of the output). We'll ignore that here (which is +// safe because we never separately use the high part of the 64-bit registers). +def POPCNTW : XForm_11<31, 378, (outs GPRC:$rA), (ins GPRC:$rS), + "popcntw $rA, $rS", IntGeneral, + [(set i32:$rA, (ctpop i32:$rS))]>; def DIVD : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "divd $rT, $rA, $rB", IntDivD, - [(set G8RC:$rT, (sdiv G8RC:$rA, G8RC:$rB))]>, isPPC64, + [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64, PPC970_DGroup_First, PPC970_DGroup_Cracked; def DIVDU : XOForm_1<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "divdu $rT, $rA, $rB", IntDivD, - [(set G8RC:$rT, (udiv G8RC:$rA, G8RC:$rB))]>, isPPC64, + [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64, PPC970_DGroup_First, PPC970_DGroup_Cracked; def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "mulld $rT, $rA, $rB", IntMulHD, - [(set G8RC:$rT, (mul G8RC:$rA, G8RC:$rB))]>, isPPC64; + [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64; let isCommutable = 1 in { @@ -512,7 +516,7 @@ def RLWINM8 : MForm_2<21, []>; def ISEL8 : AForm_4<31, 15, - (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond), + (outs G8RC:$rT), (ins G8RC_NOX0:$rA, G8RC:$rB, CRBITRC:$cond), "isel $rT, $rA, $rB, $cond", IntGeneral, []>; } // End FXU Operations. @@ -527,94 +531,96 @@ def ISEL8 : AForm_4<31, 15, let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src), "lha $rD, $src", LdStLHA, - [(set G8RC:$rD, (sextloadi16 iaddr:$src))]>, + [(set i64:$rD, (sextloadi16 iaddr:$src))]>, PPC970_DGroup_Cracked; def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src), "lwa $rD, $src", LdStLWA, - [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64, + [(set i64:$rD, + (aligned4sextloadi32 ixaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src), "lhax $rD, $src", LdStLHA, - [(set G8RC:$rD, (sextloadi16 xaddr:$src))]>, + [(set i64:$rD, (sextloadi16 xaddr:$src))]>, PPC970_DGroup_Cracked; def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src), "lwax $rD, $src", LdStLHA, - [(set G8RC:$rD, (sextloadi32 xaddr:$src))]>, isPPC64, + [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; // Update forms. -let mayLoad = 1 in -def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp, - ptr_rc:$rA), - "lhau $rD, $disp($rA)", LdStLHAU, - []>, RegConstraint<"$rA = $ea_result">, +let mayLoad = 1 in { +def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), + (ins memri:$addr), + "lhau $rD, $addr", LdStLHAU, + []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; // NO LWAU! -def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), +def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhaux $rD, $addr", LdStLHAU, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc:$ea_result), +def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwaux $rD, $addr", LdStLHAU, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } +} // Zero extending loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src), "lbz $rD, $src", LdStLoad, - [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>; + [(set i64:$rD, (zextloadi8 iaddr:$src))]>; def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src), "lhz $rD, $src", LdStLoad, - [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>; + [(set i64:$rD, (zextloadi16 iaddr:$src))]>; def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src), "lwz $rD, $src", LdStLoad, - [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64; + [(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64; def LBZX8 : XForm_1<31, 87, (outs G8RC:$rD), (ins memrr:$src), "lbzx $rD, $src", LdStLoad, - [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>; + [(set i64:$rD, (zextloadi8 xaddr:$src))]>; def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src), "lhzx $rD, $src", LdStLoad, - [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>; + [(set i64:$rD, (zextloadi16 xaddr:$src))]>; def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src), "lwzx $rD, $src", LdStLoad, - [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>; + [(set i64:$rD, (zextloadi32 xaddr:$src))]>; // Update forms. let mayLoad = 1 in { -def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result), +def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lbzux $rD, $addr", LdStLoadUpd, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc:$ea_result), +def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhzux $rD, $addr", LdStLoadUpd, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), +def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwzux $rD, $addr", LdStLoadUpd, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; } } @@ -624,25 +630,28 @@ def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src), "ld $rD, $src", LdStLD, - [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64; + [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64; +// The following three definitions are selected for small code model only. +// Otherwise, we need to create two instructions to form a 32-bit offset, +// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select(). def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), "#LDtoc", - [(set G8RC:$rD, - (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64; + [(set i64:$rD, + (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64; def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), "#LDtocJTI", - [(set G8RC:$rD, - (PPCtoc_entry tjumptable:$disp, G8RC:$reg))]>, isPPC64; + [(set i64:$rD, + (PPCtoc_entry tjumptable:$disp, i64:$reg))]>, isPPC64; def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), "#LDtocCPT", - [(set G8RC:$rD, - (PPCtoc_entry tconstpool:$disp, G8RC:$reg))]>, isPPC64; + [(set i64:$rD, + (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64; -let hasSideEffects = 1 in { +let hasSideEffects = 1, isCodeGenOnly = 1 in { let RST = 2, DS = 2 in def LDinto_toc: DSForm_1a<58, 0, (outs), (ins G8RC:$reg), "ld 2, 8($reg)", LdStLD, - [(PPCload_toc G8RC:$reg)]>, isPPC64; + [(PPCload_toc i64:$reg)]>, isPPC64; let RST = 2, DS = 10, RA = 1 in def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins), @@ -651,18 +660,21 @@ def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins), } def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), "ldx $rD, $src", LdStLD, - [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64; - + [(set i64:$rD, (load xaddr:$src))]>, isPPC64; +def LDBRX : XForm_1<31, 532, (outs G8RC:$rD), (ins memrr:$src), + "ldbrx $rD, $src", LdStLoad, + [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64; + let mayLoad = 1 in -def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr), +def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr), "ldu $rD, $addr", LdStLDU, []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, NoEncode<"$ea_result">; -def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result), +def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "ldux $rD, $addr", LdStLDU, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } @@ -671,118 +683,168 @@ def : Pat<(PPCload ixaddr:$src), def : Pat<(PPCload xaddr:$src), (LDX xaddr:$src)>; +// Support for medium and large code model. +def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp), + "#ADDIStocHA", + [(set i64:$rD, + (PPCaddisTocHA i64:$reg, tglobaladdr:$disp))]>, + isPPC64; +def LDtocL: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC_NOX0:$reg), + "#LDtocL", + [(set i64:$rD, + (PPCldTocL tglobaladdr:$disp, i64:$reg))]>, isPPC64; +def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp), + "#ADDItocL", + [(set i64:$rD, + (PPCaddiTocL i64:$reg, tglobaladdr:$disp))]>, isPPC64; + +// Support for thread-local storage. +def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp), + "#ADDISgotTprelHA", + [(set i64:$rD, + (PPCaddisGotTprelHA i64:$reg, + tglobaltlsaddr:$disp))]>, + isPPC64; +def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC_NOX0:$reg), + "#LDgotTprelL", + [(set i64:$rD, + (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>, + isPPC64; +def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g), + (ADD8TLS $in, tglobaltlsaddr:$g)>; +def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp), + "#ADDIStlsgdHA", + [(set i64:$rD, + (PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; +def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp), + "#ADDItlsgdL", + [(set i64:$rD, + (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; +def GETtlsADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym), + "#GETtlsADDR", + [(set i64:$rD, + (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>, + isPPC64; +def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp), + "#ADDIStlsldHA", + [(set i64:$rD, + (PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; +def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp), + "#ADDItlsldL", + [(set i64:$rD, + (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; +def GETtlsldADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym), + "#GETtlsldADDR", + [(set i64:$rD, + (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>, + isPPC64; +def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp), + "#ADDISdtprelHA", + [(set i64:$rD, + (PPCaddisDtprelHA i64:$reg, + tglobaltlsaddr:$disp))]>, + isPPC64; +def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp), + "#ADDIdtprelL", + [(set i64:$rD, + (PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>, + isPPC64; + let PPC970_Unit = 2 in { // Truncating stores. def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src), "stb $rS, $src", LdStStore, - [(truncstorei8 G8RC:$rS, iaddr:$src)]>; + [(truncstorei8 i64:$rS, iaddr:$src)]>; def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src), "sth $rS, $src", LdStStore, - [(truncstorei16 G8RC:$rS, iaddr:$src)]>; + [(truncstorei16 i64:$rS, iaddr:$src)]>; def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src), "stw $rS, $src", LdStStore, - [(truncstorei32 G8RC:$rS, iaddr:$src)]>; + [(truncstorei32 i64:$rS, iaddr:$src)]>; def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst), "stbx $rS, $dst", LdStStore, - [(truncstorei8 G8RC:$rS, xaddr:$dst)]>, + [(truncstorei8 i64:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst), "sthx $rS, $dst", LdStStore, - [(truncstorei16 G8RC:$rS, xaddr:$dst)]>, + [(truncstorei16 i64:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst), "stwx $rS, $dst", LdStStore, - [(truncstorei32 G8RC:$rS, xaddr:$dst)]>, + [(truncstorei32 i64:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; // Normal 8-byte stores. def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst), "std $rS, $dst", LdStSTD, - [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64; + [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64; def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst), "stdx $rS, $dst", LdStSTD, - [(store G8RC:$rS, xaddr:$dst)]>, isPPC64, + [(store i64:$rS, xaddr:$dst)]>, isPPC64, + PPC970_DGroup_Cracked; +def STDBRX: XForm_8<31, 660, (outs), (ins G8RC:$rS, memrr:$dst), + "stdbrx $rS, $dst", LdStStore, + [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64, PPC970_DGroup_Cracked; } -let PPC970_Unit = 2 in { - -def STBU8 : DForm_1a<39, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; - -def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; - -def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - s16immX4:$ptroff, ptr_rc:$ptrreg), - "stdu $rS, $ptroff($ptrreg)", LdStSTDU, - [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">, - isPPC64; - - -def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 G8RC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, +// Stores with Update (pre-inc). +let PPC970_Unit = 2, mayStore = 1 in { +def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst), + "stbu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst), + "sthu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst), + "stwu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrix:$dst), + "stdu $rS, $dst", LdStSTDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">, + isPPC64; + +def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "stbux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; - -def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 G8RC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, +def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "sthux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; - -def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti32 G8RC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, +def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "stwux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; - -def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stdux $rS, $ptroff, $ptrreg", LdStSTDU, - [(set ptr_rc:$ea_res, - (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, +def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "stdux $rS, $dst", LdStSTDU, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked, isPPC64; - -// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register. -def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst), - "std $rT, $dst", LdStSTD, - [(PPCstd_32 GPRC:$rT, ixaddr:$dst)]>, isPPC64; -def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst), - "stdx $rT, $dst", LdStSTD, - [(PPCstd_32 GPRC:$rT, xaddr:$dst)]>, isPPC64, - PPC970_DGroup_Cracked; } +// Patterns to match the pre-inc stores. We can't put the patterns on +// the instruction definitions directly as ISel wants the address base +// and offset to be separate operands, not a single complex operand. +def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STBU8 $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STHU8 $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STWU8 $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(aligned4pre_store i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STDU $rS, iaddroff:$ptroff, $ptrreg)>; + +def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STBUX8 $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STHUX8 $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STWUX8 $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STDUX $rS, $ptrreg, $ptroff)>; //===----------------------------------------------------------------------===// @@ -793,10 +855,26 @@ def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst), let PPC970_Unit = 3, Uses = [RM] in { // FPU Operations. def FCFID : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB), "fcfid $frD, $frB", FPGeneral, - [(set F8RC:$frD, (PPCfcfid F8RC:$frB))]>, isPPC64; + [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64; def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB), "fctidz $frD, $frB", FPGeneral, - [(set F8RC:$frD, (PPCfctidz F8RC:$frB))]>, isPPC64; + [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64; + +def FCFIDU : XForm_26<63, 974, (outs F8RC:$frD), (ins F8RC:$frB), + "fcfidu $frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64; +def FCFIDS : XForm_26<59, 846, (outs F4RC:$frD), (ins F8RC:$frB), + "fcfids $frD, $frB", FPGeneral, + [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64; +def FCFIDUS : XForm_26<59, 974, (outs F4RC:$frD), (ins F8RC:$frB), + "fcfidus $frD, $frB", FPGeneral, + [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64; +def FCTIDUZ : XForm_26<63, 943, (outs F8RC:$frD), (ins F8RC:$frB), + "fctiduz $frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64; +def FCTIWUZ : XForm_26<63, 143, (outs F8RC:$frD), (ins F8RC:$frB), + "fctiwuz $frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64; } @@ -805,13 +883,13 @@ def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB), // // Extensions and truncates to/from 32-bit regs. -def : Pat<(i64 (zext GPRC:$in)), - (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32), +def : Pat<(i64 (zext i32:$in)), + (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32), 0, 32)>; -def : Pat<(i64 (anyext GPRC:$in)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32)>; -def : Pat<(i32 (trunc G8RC:$in)), - (EXTRACT_SUBREG G8RC:$in, sub_32)>; +def : Pat<(i64 (anyext i32:$in)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32)>; +def : Pat<(i32 (trunc i64:$in)), + (EXTRACT_SUBREG $in, sub_32)>; // Extending loads with i64 targets. def : Pat<(zextloadi1 iaddr:$src), @@ -838,24 +916,24 @@ def : Pat<(extloadi32 xaddr:$src), // Standard shifts. These are represented separately from the real shifts above // so that we can distinguish between shifts that allow 6-bit and 7-bit shift // amounts. -def : Pat<(sra G8RC:$rS, GPRC:$rB), - (SRAD G8RC:$rS, GPRC:$rB)>; -def : Pat<(srl G8RC:$rS, GPRC:$rB), - (SRD G8RC:$rS, GPRC:$rB)>; -def : Pat<(shl G8RC:$rS, GPRC:$rB), - (SLD G8RC:$rS, GPRC:$rB)>; +def : Pat<(sra i64:$rS, i32:$rB), + (SRAD $rS, $rB)>; +def : Pat<(srl i64:$rS, i32:$rB), + (SRD $rS, $rB)>; +def : Pat<(shl i64:$rS, i32:$rB), + (SLD $rS, $rB)>; // SHL/SRL -def : Pat<(shl G8RC:$in, (i32 imm:$imm)), - (RLDICR G8RC:$in, imm:$imm, (SHL64 imm:$imm))>; -def : Pat<(srl G8RC:$in, (i32 imm:$imm)), - (RLDICL G8RC:$in, (SRL64 imm:$imm), imm:$imm)>; +def : Pat<(shl i64:$in, (i32 imm:$imm)), + (RLDICR $in, imm:$imm, (SHL64 imm:$imm))>; +def : Pat<(srl i64:$in, (i32 imm:$imm)), + (RLDICL $in, (SRL64 imm:$imm), imm:$imm)>; // ROTL -def : Pat<(rotl G8RC:$in, GPRC:$sh), - (RLDCL G8RC:$in, GPRC:$sh, 0)>; -def : Pat<(rotl G8RC:$in, (i32 imm:$imm)), - (RLDICL G8RC:$in, imm:$imm, 0)>; +def : Pat<(rotl i64:$in, i32:$sh), + (RLDCL $in, $sh, 0)>; +def : Pat<(rotl i64:$in, (i32 imm:$imm)), + (RLDICL $in, imm:$imm, 0)>; // Hi and Lo for Darwin Global Addresses. def : Pat<(PPChi tglobaladdr:$in, 0), (LIS8 tglobaladdr:$in)>; @@ -866,15 +944,25 @@ def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>; def : Pat<(PPClo tjumptable:$in , 0), (LI8 tjumptable:$in)>; def : Pat<(PPChi tblockaddress:$in, 0), (LIS8 tblockaddress:$in)>; def : Pat<(PPClo tblockaddress:$in, 0), (LI8 tblockaddress:$in)>; -def : Pat<(PPChi tglobaltlsaddr:$g, G8RC:$in), - (ADDIS8 G8RC:$in, tglobaltlsaddr:$g)>; -def : Pat<(PPClo tglobaltlsaddr:$g, G8RC:$in), - (ADDI8L G8RC:$in, tglobaltlsaddr:$g)>; -def : Pat<(add G8RC:$in, (PPChi tglobaladdr:$g, 0)), - (ADDIS8 G8RC:$in, tglobaladdr:$g)>; -def : Pat<(add G8RC:$in, (PPChi tconstpool:$g, 0)), - (ADDIS8 G8RC:$in, tconstpool:$g)>; -def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)), - (ADDIS8 G8RC:$in, tjumptable:$g)>; -def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)), - (ADDIS8 G8RC:$in, tblockaddress:$g)>; +def : Pat<(PPChi tglobaltlsaddr:$g, i64:$in), + (ADDIS8 $in, tglobaltlsaddr:$g)>; +def : Pat<(PPClo tglobaltlsaddr:$g, i64:$in), + (ADDI8 $in, tglobaltlsaddr:$g)>; +def : Pat<(add i64:$in, (PPChi tglobaladdr:$g, 0)), + (ADDIS8 $in, tglobaladdr:$g)>; +def : Pat<(add i64:$in, (PPChi tconstpool:$g, 0)), + (ADDIS8 $in, tconstpool:$g)>; +def : Pat<(add i64:$in, (PPChi tjumptable:$g, 0)), + (ADDIS8 $in, tjumptable:$g)>; +def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)), + (ADDIS8 $in, tblockaddress:$g)>; + +// Patterns to match r+r indexed loads and stores for +// addresses without at least 4-byte alignment. +def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)), + (LWAX xoaddr:$src)>; +def : Pat<(i64 (unaligned4load xoaddr:$src)), + (LDX xoaddr:$src)>; +def : Pat<(unaligned4store i64:$rS, xoaddr:$dst), + (STDX $rS, xoaddr:$dst)>; + diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index ba58c3e..a5ba4c8 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -158,34 +158,75 @@ def vecspltisw : PatLeaf<(build_vector), [{ return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != 0; }], VSPLTISW_get_imm>; -def V_immneg0 : PatLeaf<(build_vector), [{ - return PPC::isAllNegativeZeroVector(N); -}]>; - //===----------------------------------------------------------------------===// // Helpers for defining instructions that directly correspond to intrinsics. -// VA1a_Int - A VAForm_1a intrinsic definition. -class VA1a_Int<bits<6> xo, string opc, Intrinsic IntID> +// VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type. +class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty> : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC), !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP, - [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB, VRRC:$vC))]>; + [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB, Ty:$vC))]>; -// VX1_Int - A VXForm_1 intrinsic definition. -class VX1_Int<bits<11> xo, string opc, Intrinsic IntID> +// VA1a_Int_Ty2 - A VAForm_1a intrinsic definition where the type of the +// inputs doesn't match the type of the output. +class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy, + ValueType InTy> + : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC), + !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP, + [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB, InTy:$vC))]>; + +// VA1a_Int_Ty3 - A VAForm_1a intrinsic definition where there are two +// input types and an output type. +class VA1a_Int_Ty3<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy, + ValueType In1Ty, ValueType In2Ty> + : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC), + !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP, + [(set OutTy:$vD, + (IntID In1Ty:$vA, In1Ty:$vB, In2Ty:$vC))]>; + +// VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type. +class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty> + : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + !strconcat(opc, " $vD, $vA, $vB"), VecFP, + [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB))]>; + +// VX1_Int_Ty2 - A VXForm_1 intrinsic definition where the type of the +// inputs doesn't match the type of the output. +class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, + ValueType InTy> : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), !strconcat(opc, " $vD, $vA, $vB"), VecFP, - [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB))]>; + [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB))]>; -// VX2_Int - A VXForm_2 intrinsic definition. -class VX2_Int<bits<11> xo, string opc, Intrinsic IntID> +// VX1_Int_Ty3 - A VXForm_1 intrinsic definition where there are two +// input types and an output type. +class VX1_Int_Ty3<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, + ValueType In1Ty, ValueType In2Ty> + : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + !strconcat(opc, " $vD, $vA, $vB"), VecFP, + [(set OutTy:$vD, (IntID In1Ty:$vA, In2Ty:$vB))]>; + +// VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type. +class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID> + : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB), + !strconcat(opc, " $vD, $vB"), VecFP, + [(set v4f32:$vD, (IntID v4f32:$vB))]>; + +// VX2_Int_Ty2 - A VXForm_2 intrinsic definition where the type of the +// inputs doesn't match the type of the output. +class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, + ValueType InTy> : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB), !strconcat(opc, " $vD, $vB"), VecFP, - [(set VRRC:$vD, (IntID VRRC:$vB))]>; + [(set OutTy:$vD, (IntID InTy:$vB))]>; //===----------------------------------------------------------------------===// // Instruction Definitions. +def HasAltivec : Predicate<"PPCSubTarget.hasAltivec()">; +let Predicates = [HasAltivec] in { + +let isCodeGenOnly = 1 in { def DSS : DSS_Form<822, (outs), (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2), "dss $STRM", LdStLoad /*FIXME*/, []>; @@ -217,129 +258,136 @@ def DSTST64 : DSS_Form<374, (outs), def DSTSTT64 : DSS_Form<374, (outs), (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB), "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; +} def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins), "mfvscr $vD", LdStStore, - [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>; + [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB), "mtvscr $vB", LdStLoad, - [(int_ppc_altivec_mtvscr VRRC:$vB)]>; + [(int_ppc_altivec_mtvscr v4i32:$vB)]>; let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads. def LVEBX: XForm_1<31, 7, (outs VRRC:$vD), (ins memrr:$src), "lvebx $vD, $src", LdStLoad, - [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; + [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; def LVEHX: XForm_1<31, 39, (outs VRRC:$vD), (ins memrr:$src), "lvehx $vD, $src", LdStLoad, - [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>; + [(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>; def LVEWX: XForm_1<31, 71, (outs VRRC:$vD), (ins memrr:$src), "lvewx $vD, $src", LdStLoad, - [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>; + [(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>; def LVX : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src), "lvx $vD, $src", LdStLoad, - [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>; + [(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>; def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src), "lvxl $vD, $src", LdStLoad, - [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>; + [(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>; } def LVSL : XForm_1<31, 6, (outs VRRC:$vD), (ins memrr:$src), "lvsl $vD, $src", LdStLoad, - [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>, + [(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>, PPC970_Unit_LSU; def LVSR : XForm_1<31, 38, (outs VRRC:$vD), (ins memrr:$src), "lvsr $vD, $src", LdStLoad, - [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>, + [(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>, PPC970_Unit_LSU; let PPC970_Unit = 2 in { // Stores. def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst), "stvebx $rS, $dst", LdStStore, - [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>; def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst), "stvehx $rS, $dst", LdStStore, - [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>; def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst), "stvewx $rS, $dst", LdStStore, - [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>; def STVX : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst), "stvx $rS, $dst", LdStStore, - [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>; def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst), "stvxl $rS, $dst", LdStStore, - [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>; } let PPC970_Unit = 5 in { // VALU Operations. // VA-Form instructions. 3-input AltiVec ops. def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB), "vmaddfp $vD, $vA, $vC, $vB", VecFP, - [(set VRRC:$vD, (fma VRRC:$vA, VRRC:$vC, VRRC:$vB))]>; + [(set v4f32:$vD, + (fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>; + +// FIXME: The fma+fneg pattern won't match because fneg is not legal. def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB), "vnmsubfp $vD, $vA, $vC, $vB", VecFP, - [(set VRRC:$vD, (fneg (fma VRRC:$vA, VRRC:$vC, - (fneg VRRC:$vB))))]>; + [(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC, + (fneg v4f32:$vB))))]>; + +def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>; +def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs, + v8i16>; +def VMLADDUHM : VA1a_Int_Ty<34, "vmladduhm", int_ppc_altivec_vmladduhm, v8i16>; -def VMHADDSHS : VA1a_Int<32, "vmhaddshs", int_ppc_altivec_vmhaddshs>; -def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>; -def VMLADDUHM : VA1a_Int<34, "vmladduhm", int_ppc_altivec_vmladduhm>; -def VPERM : VA1a_Int<43, "vperm", int_ppc_altivec_vperm>; -def VSEL : VA1a_Int<42, "vsel", int_ppc_altivec_vsel>; +def VPERM : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm, + v4i32, v4i32, v16i8>; +def VSEL : VA1a_Int_Ty<42, "vsel", int_ppc_altivec_vsel, v4i32>; // Shuffles. def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH), "vsldoi $vD, $vA, $vB, $SH", VecFP, - [(set VRRC:$vD, - (vsldoi_shuffle:$SH (v16i8 VRRC:$vA), VRRC:$vB))]>; + [(set v16i8:$vD, + (vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB))]>; // VX-Form instructions. AltiVec arithmetic ops. def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vaddfp $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (fadd VRRC:$vA, VRRC:$vB))]>; + [(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>; def VADDUBM : VXForm_1<0, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vaddubm $vD, $vA, $vB", VecGeneral, - [(set VRRC:$vD, (add (v16i8 VRRC:$vA), VRRC:$vB))]>; + [(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>; def VADDUHM : VXForm_1<64, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vadduhm $vD, $vA, $vB", VecGeneral, - [(set VRRC:$vD, (add (v8i16 VRRC:$vA), VRRC:$vB))]>; + [(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>; def VADDUWM : VXForm_1<128, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vadduwm $vD, $vA, $vB", VecGeneral, - [(set VRRC:$vD, (add (v4i32 VRRC:$vA), VRRC:$vB))]>; + [(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>; -def VADDCUW : VX1_Int<384, "vaddcuw", int_ppc_altivec_vaddcuw>; -def VADDSBS : VX1_Int<768, "vaddsbs", int_ppc_altivec_vaddsbs>; -def VADDSHS : VX1_Int<832, "vaddshs", int_ppc_altivec_vaddshs>; -def VADDSWS : VX1_Int<896, "vaddsws", int_ppc_altivec_vaddsws>; -def VADDUBS : VX1_Int<512, "vaddubs", int_ppc_altivec_vaddubs>; -def VADDUHS : VX1_Int<576, "vadduhs", int_ppc_altivec_vadduhs>; -def VADDUWS : VX1_Int<640, "vadduws", int_ppc_altivec_vadduws>; +def VADDCUW : VX1_Int_Ty<384, "vaddcuw", int_ppc_altivec_vaddcuw, v4i32>; +def VADDSBS : VX1_Int_Ty<768, "vaddsbs", int_ppc_altivec_vaddsbs, v16i8>; +def VADDSHS : VX1_Int_Ty<832, "vaddshs", int_ppc_altivec_vaddshs, v8i16>; +def VADDSWS : VX1_Int_Ty<896, "vaddsws", int_ppc_altivec_vaddsws, v4i32>; +def VADDUBS : VX1_Int_Ty<512, "vaddubs", int_ppc_altivec_vaddubs, v16i8>; +def VADDUHS : VX1_Int_Ty<576, "vadduhs", int_ppc_altivec_vadduhs, v8i16>; +def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>; def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vand $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (and (v4i32 VRRC:$vA), VRRC:$vB))]>; + [(set v4i32:$vD, (and v4i32:$vA, v4i32:$vB))]>; def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vandc $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (and (v4i32 VRRC:$vA), - (vnot_ppc VRRC:$vB)))]>; + [(set v4i32:$vD, (and v4i32:$vA, + (vnot_ppc v4i32:$vB)))]>; def VCFSX : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vcfsx $vD, $vB, $UIMM", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vcfsx VRRC:$vB, imm:$UIMM))]>; + [(set v4f32:$vD, + (int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>; def VCFUX : VXForm_1<778, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vcfux $vD, $vB, $UIMM", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vcfux VRRC:$vB, imm:$UIMM))]>; + [(set v4f32:$vD, + (int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>; def VCTSXS : VXForm_1<970, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vctsxs $vD, $vB, $UIMM", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vctsxs VRRC:$vB, imm:$UIMM))]>; + [(set v4i32:$vD, + (int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>; def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vctuxs $vD, $vB, $UIMM", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vctuxs VRRC:$vB, imm:$UIMM))]>; + [(set v4i32:$vD, + (int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>; // Defines with the UIM field set to 0 for floating-point // to integer (fp_to_sint/fp_to_uint) conversions and integer @@ -347,203 +395,237 @@ def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), let VA = 0 in { def VCFSX_0 : VXForm_1<842, (outs VRRC:$vD), (ins VRRC:$vB), "vcfsx $vD, $vB, 0", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vcfsx VRRC:$vB, 0))]>; + [(set v4f32:$vD, + (int_ppc_altivec_vcfsx v4i32:$vB, 0))]>; def VCTUXS_0 : VXForm_1<906, (outs VRRC:$vD), (ins VRRC:$vB), "vctuxs $vD, $vB, 0", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vctuxs VRRC:$vB, 0))]>; + [(set v4i32:$vD, + (int_ppc_altivec_vctuxs v4f32:$vB, 0))]>; def VCFUX_0 : VXForm_1<778, (outs VRRC:$vD), (ins VRRC:$vB), "vcfux $vD, $vB, 0", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vcfux VRRC:$vB, 0))]>; + [(set v4f32:$vD, + (int_ppc_altivec_vcfux v4i32:$vB, 0))]>; def VCTSXS_0 : VXForm_1<970, (outs VRRC:$vD), (ins VRRC:$vB), "vctsxs $vD, $vB, 0", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vctsxs VRRC:$vB, 0))]>; + [(set v4i32:$vD, + (int_ppc_altivec_vctsxs v4f32:$vB, 0))]>; } -def VEXPTEFP : VX2_Int<394, "vexptefp", int_ppc_altivec_vexptefp>; -def VLOGEFP : VX2_Int<458, "vlogefp", int_ppc_altivec_vlogefp>; - -def VAVGSB : VX1_Int<1282, "vavgsb", int_ppc_altivec_vavgsb>; -def VAVGSH : VX1_Int<1346, "vavgsh", int_ppc_altivec_vavgsh>; -def VAVGSW : VX1_Int<1410, "vavgsw", int_ppc_altivec_vavgsw>; -def VAVGUB : VX1_Int<1026, "vavgub", int_ppc_altivec_vavgub>; -def VAVGUH : VX1_Int<1090, "vavguh", int_ppc_altivec_vavguh>; -def VAVGUW : VX1_Int<1154, "vavguw", int_ppc_altivec_vavguw>; - -def VMAXFP : VX1_Int<1034, "vmaxfp", int_ppc_altivec_vmaxfp>; -def VMAXSB : VX1_Int< 258, "vmaxsb", int_ppc_altivec_vmaxsb>; -def VMAXSH : VX1_Int< 322, "vmaxsh", int_ppc_altivec_vmaxsh>; -def VMAXSW : VX1_Int< 386, "vmaxsw", int_ppc_altivec_vmaxsw>; -def VMAXUB : VX1_Int< 2, "vmaxub", int_ppc_altivec_vmaxub>; -def VMAXUH : VX1_Int< 66, "vmaxuh", int_ppc_altivec_vmaxuh>; -def VMAXUW : VX1_Int< 130, "vmaxuw", int_ppc_altivec_vmaxuw>; -def VMINFP : VX1_Int<1098, "vminfp", int_ppc_altivec_vminfp>; -def VMINSB : VX1_Int< 770, "vminsb", int_ppc_altivec_vminsb>; -def VMINSH : VX1_Int< 834, "vminsh", int_ppc_altivec_vminsh>; -def VMINSW : VX1_Int< 898, "vminsw", int_ppc_altivec_vminsw>; -def VMINUB : VX1_Int< 514, "vminub", int_ppc_altivec_vminub>; -def VMINUH : VX1_Int< 578, "vminuh", int_ppc_altivec_vminuh>; -def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>; +def VEXPTEFP : VX2_Int_SP<394, "vexptefp", int_ppc_altivec_vexptefp>; +def VLOGEFP : VX2_Int_SP<458, "vlogefp", int_ppc_altivec_vlogefp>; + +def VAVGSB : VX1_Int_Ty<1282, "vavgsb", int_ppc_altivec_vavgsb, v16i8>; +def VAVGSH : VX1_Int_Ty<1346, "vavgsh", int_ppc_altivec_vavgsh, v8i16>; +def VAVGSW : VX1_Int_Ty<1410, "vavgsw", int_ppc_altivec_vavgsw, v4i32>; +def VAVGUB : VX1_Int_Ty<1026, "vavgub", int_ppc_altivec_vavgub, v16i8>; +def VAVGUH : VX1_Int_Ty<1090, "vavguh", int_ppc_altivec_vavguh, v8i16>; +def VAVGUW : VX1_Int_Ty<1154, "vavguw", int_ppc_altivec_vavguw, v4i32>; + +def VMAXFP : VX1_Int_Ty<1034, "vmaxfp", int_ppc_altivec_vmaxfp, v4f32>; +def VMAXSB : VX1_Int_Ty< 258, "vmaxsb", int_ppc_altivec_vmaxsb, v16i8>; +def VMAXSH : VX1_Int_Ty< 322, "vmaxsh", int_ppc_altivec_vmaxsh, v8i16>; +def VMAXSW : VX1_Int_Ty< 386, "vmaxsw", int_ppc_altivec_vmaxsw, v4i32>; +def VMAXUB : VX1_Int_Ty< 2, "vmaxub", int_ppc_altivec_vmaxub, v16i8>; +def VMAXUH : VX1_Int_Ty< 66, "vmaxuh", int_ppc_altivec_vmaxuh, v8i16>; +def VMAXUW : VX1_Int_Ty< 130, "vmaxuw", int_ppc_altivec_vmaxuw, v4i32>; +def VMINFP : VX1_Int_Ty<1098, "vminfp", int_ppc_altivec_vminfp, v4f32>; +def VMINSB : VX1_Int_Ty< 770, "vminsb", int_ppc_altivec_vminsb, v16i8>; +def VMINSH : VX1_Int_Ty< 834, "vminsh", int_ppc_altivec_vminsh, v8i16>; +def VMINSW : VX1_Int_Ty< 898, "vminsw", int_ppc_altivec_vminsw, v4i32>; +def VMINUB : VX1_Int_Ty< 514, "vminub", int_ppc_altivec_vminub, v16i8>; +def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>; +def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>; def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrghb $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>; + [(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrghh $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>; + [(set v16i8:$vD, (vmrghh_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrghw $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>; + [(set v16i8:$vD, (vmrghw_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrglb $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>; + [(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrglh $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>; + [(set v16i8:$vD, (vmrglh_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrglw $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>; - -def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>; -def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>; -def VMSUMSHS : VA1a_Int<41, "vmsumshs", int_ppc_altivec_vmsumshs>; -def VMSUMUBM : VA1a_Int<36, "vmsumubm", int_ppc_altivec_vmsumubm>; -def VMSUMUHM : VA1a_Int<38, "vmsumuhm", int_ppc_altivec_vmsumuhm>; -def VMSUMUHS : VA1a_Int<39, "vmsumuhs", int_ppc_altivec_vmsumuhs>; - -def VMULESB : VX1_Int<776, "vmulesb", int_ppc_altivec_vmulesb>; -def VMULESH : VX1_Int<840, "vmulesh", int_ppc_altivec_vmulesh>; -def VMULEUB : VX1_Int<520, "vmuleub", int_ppc_altivec_vmuleub>; -def VMULEUH : VX1_Int<584, "vmuleuh", int_ppc_altivec_vmuleuh>; -def VMULOSB : VX1_Int<264, "vmulosb", int_ppc_altivec_vmulosb>; -def VMULOSH : VX1_Int<328, "vmulosh", int_ppc_altivec_vmulosh>; -def VMULOUB : VX1_Int< 8, "vmuloub", int_ppc_altivec_vmuloub>; -def VMULOUH : VX1_Int< 72, "vmulouh", int_ppc_altivec_vmulouh>; + [(set v16i8:$vD, (vmrglw_shuffle v16i8:$vA, v16i8:$vB))]>; + +def VMSUMMBM : VA1a_Int_Ty3<37, "vmsummbm", int_ppc_altivec_vmsummbm, + v4i32, v16i8, v4i32>; +def VMSUMSHM : VA1a_Int_Ty3<40, "vmsumshm", int_ppc_altivec_vmsumshm, + v4i32, v8i16, v4i32>; +def VMSUMSHS : VA1a_Int_Ty3<41, "vmsumshs", int_ppc_altivec_vmsumshs, + v4i32, v8i16, v4i32>; +def VMSUMUBM : VA1a_Int_Ty3<36, "vmsumubm", int_ppc_altivec_vmsumubm, + v4i32, v16i8, v4i32>; +def VMSUMUHM : VA1a_Int_Ty3<38, "vmsumuhm", int_ppc_altivec_vmsumuhm, + v4i32, v8i16, v4i32>; +def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs, + v4i32, v8i16, v4i32>; + +def VMULESB : VX1_Int_Ty2<776, "vmulesb", int_ppc_altivec_vmulesb, + v8i16, v16i8>; +def VMULESH : VX1_Int_Ty2<840, "vmulesh", int_ppc_altivec_vmulesh, + v4i32, v8i16>; +def VMULEUB : VX1_Int_Ty2<520, "vmuleub", int_ppc_altivec_vmuleub, + v8i16, v16i8>; +def VMULEUH : VX1_Int_Ty2<584, "vmuleuh", int_ppc_altivec_vmuleuh, + v4i32, v8i16>; +def VMULOSB : VX1_Int_Ty2<264, "vmulosb", int_ppc_altivec_vmulosb, + v8i16, v16i8>; +def VMULOSH : VX1_Int_Ty2<328, "vmulosh", int_ppc_altivec_vmulosh, + v4i32, v8i16>; +def VMULOUB : VX1_Int_Ty2< 8, "vmuloub", int_ppc_altivec_vmuloub, + v8i16, v16i8>; +def VMULOUH : VX1_Int_Ty2< 72, "vmulouh", int_ppc_altivec_vmulouh, + v4i32, v8i16>; -def VREFP : VX2_Int<266, "vrefp", int_ppc_altivec_vrefp>; -def VRFIM : VX2_Int<714, "vrfim", int_ppc_altivec_vrfim>; -def VRFIN : VX2_Int<522, "vrfin", int_ppc_altivec_vrfin>; -def VRFIP : VX2_Int<650, "vrfip", int_ppc_altivec_vrfip>; -def VRFIZ : VX2_Int<586, "vrfiz", int_ppc_altivec_vrfiz>; -def VRSQRTEFP : VX2_Int<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>; +def VREFP : VX2_Int_SP<266, "vrefp", int_ppc_altivec_vrefp>; +def VRFIM : VX2_Int_SP<714, "vrfim", int_ppc_altivec_vrfim>; +def VRFIN : VX2_Int_SP<522, "vrfin", int_ppc_altivec_vrfin>; +def VRFIP : VX2_Int_SP<650, "vrfip", int_ppc_altivec_vrfip>; +def VRFIZ : VX2_Int_SP<586, "vrfiz", int_ppc_altivec_vrfiz>; +def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>; -def VSUBCUW : VX1_Int<74, "vsubcuw", int_ppc_altivec_vsubcuw>; +def VSUBCUW : VX1_Int_Ty<74, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>; def VSUBFP : VXForm_1<74, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vsubfp $vD, $vA, $vB", VecGeneral, - [(set VRRC:$vD, (fsub VRRC:$vA, VRRC:$vB))]>; + [(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>; def VSUBUBM : VXForm_1<1024, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vsububm $vD, $vA, $vB", VecGeneral, - [(set VRRC:$vD, (sub (v16i8 VRRC:$vA), VRRC:$vB))]>; + [(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>; def VSUBUHM : VXForm_1<1088, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vsubuhm $vD, $vA, $vB", VecGeneral, - [(set VRRC:$vD, (sub (v8i16 VRRC:$vA), VRRC:$vB))]>; + [(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>; def VSUBUWM : VXForm_1<1152, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vsubuwm $vD, $vA, $vB", VecGeneral, - [(set VRRC:$vD, (sub (v4i32 VRRC:$vA), VRRC:$vB))]>; + [(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>; -def VSUBSBS : VX1_Int<1792, "vsubsbs" , int_ppc_altivec_vsubsbs>; -def VSUBSHS : VX1_Int<1856, "vsubshs" , int_ppc_altivec_vsubshs>; -def VSUBSWS : VX1_Int<1920, "vsubsws" , int_ppc_altivec_vsubsws>; -def VSUBUBS : VX1_Int<1536, "vsububs" , int_ppc_altivec_vsububs>; -def VSUBUHS : VX1_Int<1600, "vsubuhs" , int_ppc_altivec_vsubuhs>; -def VSUBUWS : VX1_Int<1664, "vsubuws" , int_ppc_altivec_vsubuws>; -def VSUMSWS : VX1_Int<1928, "vsumsws" , int_ppc_altivec_vsumsws>; -def VSUM2SWS: VX1_Int<1672, "vsum2sws", int_ppc_altivec_vsum2sws>; -def VSUM4SBS: VX1_Int<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs>; -def VSUM4SHS: VX1_Int<1608, "vsum4shs", int_ppc_altivec_vsum4shs>; -def VSUM4UBS: VX1_Int<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs>; +def VSUBSBS : VX1_Int_Ty<1792, "vsubsbs" , int_ppc_altivec_vsubsbs, v16i8>; +def VSUBSHS : VX1_Int_Ty<1856, "vsubshs" , int_ppc_altivec_vsubshs, v8i16>; +def VSUBSWS : VX1_Int_Ty<1920, "vsubsws" , int_ppc_altivec_vsubsws, v4i32>; +def VSUBUBS : VX1_Int_Ty<1536, "vsububs" , int_ppc_altivec_vsububs, v16i8>; +def VSUBUHS : VX1_Int_Ty<1600, "vsubuhs" , int_ppc_altivec_vsubuhs, v8i16>; +def VSUBUWS : VX1_Int_Ty<1664, "vsubuws" , int_ppc_altivec_vsubuws, v4i32>; + +def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>; +def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>; + +def VSUM4SBS: VX1_Int_Ty3<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs, + v4i32, v16i8, v4i32>; +def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs, + v4i32, v8i16, v4i32>; +def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs, + v4i32, v16i8, v4i32>; def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vnor $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vnot_ppc (or (v4i32 VRRC:$vA), - VRRC:$vB)))]>; + [(set v4i32:$vD, (vnot_ppc (or v4i32:$vA, + v4i32:$vB)))]>; def VOR : VXForm_1<1156, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vor $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (or (v4i32 VRRC:$vA), VRRC:$vB))]>; + [(set v4i32:$vD, (or v4i32:$vA, v4i32:$vB))]>; def VXOR : VXForm_1<1220, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vxor $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (xor (v4i32 VRRC:$vA), VRRC:$vB))]>; + [(set v4i32:$vD, (xor v4i32:$vA, v4i32:$vB))]>; + +def VRLB : VX1_Int_Ty< 4, "vrlb", int_ppc_altivec_vrlb, v16i8>; +def VRLH : VX1_Int_Ty< 68, "vrlh", int_ppc_altivec_vrlh, v8i16>; +def VRLW : VX1_Int_Ty< 132, "vrlw", int_ppc_altivec_vrlw, v4i32>; -def VRLB : VX1_Int< 4, "vrlb", int_ppc_altivec_vrlb>; -def VRLH : VX1_Int< 68, "vrlh", int_ppc_altivec_vrlh>; -def VRLW : VX1_Int< 132, "vrlw", int_ppc_altivec_vrlw>; +def VSL : VX1_Int_Ty< 452, "vsl" , int_ppc_altivec_vsl, v4i32 >; +def VSLO : VX1_Int_Ty<1036, "vslo", int_ppc_altivec_vslo, v4i32>; -def VSL : VX1_Int< 452, "vsl" , int_ppc_altivec_vsl >; -def VSLO : VX1_Int<1036, "vslo", int_ppc_altivec_vslo>; -def VSLB : VX1_Int< 260, "vslb", int_ppc_altivec_vslb>; -def VSLH : VX1_Int< 324, "vslh", int_ppc_altivec_vslh>; -def VSLW : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>; +def VSLB : VX1_Int_Ty< 260, "vslb", int_ppc_altivec_vslb, v16i8>; +def VSLH : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>; +def VSLW : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>; def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vspltb $vD, $vB, $UIMM", VecPerm, - [(set VRRC:$vD, - (vspltb_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>; + [(set v16i8:$vD, + (vspltb_shuffle:$UIMM v16i8:$vB, (undef)))]>; def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vsplth $vD, $vB, $UIMM", VecPerm, - [(set VRRC:$vD, - (vsplth_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>; + [(set v16i8:$vD, + (vsplth_shuffle:$UIMM v16i8:$vB, (undef)))]>; def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vspltw $vD, $vB, $UIMM", VecPerm, - [(set VRRC:$vD, - (vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>; + [(set v16i8:$vD, + (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>; -def VSR : VX1_Int< 708, "vsr" , int_ppc_altivec_vsr>; -def VSRO : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>; -def VSRAB : VX1_Int< 772, "vsrab", int_ppc_altivec_vsrab>; -def VSRAH : VX1_Int< 836, "vsrah", int_ppc_altivec_vsrah>; -def VSRAW : VX1_Int< 900, "vsraw", int_ppc_altivec_vsraw>; -def VSRB : VX1_Int< 516, "vsrb" , int_ppc_altivec_vsrb>; -def VSRH : VX1_Int< 580, "vsrh" , int_ppc_altivec_vsrh>; -def VSRW : VX1_Int< 644, "vsrw" , int_ppc_altivec_vsrw>; +def VSR : VX1_Int_Ty< 708, "vsr" , int_ppc_altivec_vsr, v4i32>; +def VSRO : VX1_Int_Ty<1100, "vsro" , int_ppc_altivec_vsro, v4i32>; + +def VSRAB : VX1_Int_Ty< 772, "vsrab", int_ppc_altivec_vsrab, v16i8>; +def VSRAH : VX1_Int_Ty< 836, "vsrah", int_ppc_altivec_vsrah, v8i16>; +def VSRAW : VX1_Int_Ty< 900, "vsraw", int_ppc_altivec_vsraw, v4i32>; +def VSRB : VX1_Int_Ty< 516, "vsrb" , int_ppc_altivec_vsrb , v16i8>; +def VSRH : VX1_Int_Ty< 580, "vsrh" , int_ppc_altivec_vsrh , v8i16>; +def VSRW : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>; def VSPLTISB : VXForm_3<780, (outs VRRC:$vD), (ins s5imm:$SIMM), "vspltisb $vD, $SIMM", VecPerm, - [(set VRRC:$vD, (v16i8 vecspltisb:$SIMM))]>; + [(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>; def VSPLTISH : VXForm_3<844, (outs VRRC:$vD), (ins s5imm:$SIMM), "vspltish $vD, $SIMM", VecPerm, - [(set VRRC:$vD, (v8i16 vecspltish:$SIMM))]>; + [(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>; def VSPLTISW : VXForm_3<908, (outs VRRC:$vD), (ins s5imm:$SIMM), "vspltisw $vD, $SIMM", VecPerm, - [(set VRRC:$vD, (v4i32 vecspltisw:$SIMM))]>; + [(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>; // Vector Pack. -def VPKPX : VX1_Int<782, "vpkpx", int_ppc_altivec_vpkpx>; -def VPKSHSS : VX1_Int<398, "vpkshss", int_ppc_altivec_vpkshss>; -def VPKSHUS : VX1_Int<270, "vpkshus", int_ppc_altivec_vpkshus>; -def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>; -def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>; +def VPKPX : VX1_Int_Ty2<782, "vpkpx", int_ppc_altivec_vpkpx, + v8i16, v4i32>; +def VPKSHSS : VX1_Int_Ty2<398, "vpkshss", int_ppc_altivec_vpkshss, + v16i8, v8i16>; +def VPKSHUS : VX1_Int_Ty2<270, "vpkshus", int_ppc_altivec_vpkshus, + v16i8, v8i16>; +def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss, + v16i8, v4i32>; +def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus, + v8i16, v4i32>; def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vpkuhum $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, - (vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>; -def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>; + [(set v16i8:$vD, + (vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>; +def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus, + v16i8, v8i16>; def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vpkuwum $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, - (vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>; -def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>; + [(set v16i8:$vD, + (vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>; +def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus, + v8i16, v4i32>; // Vector Unpack. -def VUPKHPX : VX2_Int<846, "vupkhpx", int_ppc_altivec_vupkhpx>; -def VUPKHSB : VX2_Int<526, "vupkhsb", int_ppc_altivec_vupkhsb>; -def VUPKHSH : VX2_Int<590, "vupkhsh", int_ppc_altivec_vupkhsh>; -def VUPKLPX : VX2_Int<974, "vupklpx", int_ppc_altivec_vupklpx>; -def VUPKLSB : VX2_Int<654, "vupklsb", int_ppc_altivec_vupklsb>; -def VUPKLSH : VX2_Int<718, "vupklsh", int_ppc_altivec_vupklsh>; +def VUPKHPX : VX2_Int_Ty2<846, "vupkhpx", int_ppc_altivec_vupkhpx, + v4i32, v8i16>; +def VUPKHSB : VX2_Int_Ty2<526, "vupkhsb", int_ppc_altivec_vupkhsb, + v8i16, v16i8>; +def VUPKHSH : VX2_Int_Ty2<590, "vupkhsh", int_ppc_altivec_vupkhsh, + v4i32, v8i16>; +def VUPKLPX : VX2_Int_Ty2<974, "vupklpx", int_ppc_altivec_vupklpx, + v4i32, v8i16>; +def VUPKLSB : VX2_Int_Ty2<654, "vupklsb", int_ppc_altivec_vupklsb, + v8i16, v16i8>; +def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh, + v4i32, v8i16>; // Altivec Comparisons. class VCMP<bits<10> xo, string asmstr, ValueType Ty> : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare, - [(set VRRC:$vD, (Ty (PPCvcmp VRRC:$vA, VRRC:$vB, xo)))]>; + [(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>; class VCMPo<bits<10> xo, string asmstr, ValueType Ty> : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare, - [(set VRRC:$vD, (Ty (PPCvcmp_o VRRC:$vA, VRRC:$vB, xo)))]> { + [(set Ty:$vD, (Ty (PPCvcmp_o Ty:$vA, Ty:$vB, xo)))]> { let Defs = [CR6]; let RC = 1; } @@ -582,10 +664,16 @@ def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>; def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>; def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>; +let isCodeGenOnly = 1 in def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins), "vxor $vD, $vD, $vD", VecFP, - [(set VRRC:$vD, (v4i32 immAllZerosV))]>; + [(set v4i32:$vD, (v4i32 immAllZerosV))]>; +let IMM=-1 in { +def V_SETALLONES : VXForm_3<908, (outs VRRC:$vD), (ins), + "vspltisw $vD, -1", VecFP, + [(set v4i32:$vD, (v4i32 immAllOnesV))]>; } +} // VALU Operations. //===----------------------------------------------------------------------===// // Additional Altivec Patterns @@ -596,31 +684,31 @@ def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>; def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>; // * 32-bit -def : Pat<(int_ppc_altivec_dst GPRC:$rA, GPRC:$rB, imm:$STRM), - (DST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>; -def : Pat<(int_ppc_altivec_dstt GPRC:$rA, GPRC:$rB, imm:$STRM), - (DSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>; -def : Pat<(int_ppc_altivec_dstst GPRC:$rA, GPRC:$rB, imm:$STRM), - (DSTST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>; -def : Pat<(int_ppc_altivec_dststt GPRC:$rA, GPRC:$rB, imm:$STRM), - (DSTSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>; +def : Pat<(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM), + (DST 0, imm:$STRM, $rA, $rB)>; +def : Pat<(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM), + (DSTT 1, imm:$STRM, $rA, $rB)>; +def : Pat<(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM), + (DSTST 0, imm:$STRM, $rA, $rB)>; +def : Pat<(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM), + (DSTSTT 1, imm:$STRM, $rA, $rB)>; // * 64-bit -def : Pat<(int_ppc_altivec_dst G8RC:$rA, GPRC:$rB, imm:$STRM), - (DST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>; -def : Pat<(int_ppc_altivec_dstt G8RC:$rA, GPRC:$rB, imm:$STRM), - (DSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>; -def : Pat<(int_ppc_altivec_dstst G8RC:$rA, GPRC:$rB, imm:$STRM), - (DSTST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>; -def : Pat<(int_ppc_altivec_dststt G8RC:$rA, GPRC:$rB, imm:$STRM), - (DSTSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>; +def : Pat<(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM), + (DST64 0, imm:$STRM, $rA, $rB)>; +def : Pat<(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM), + (DSTT64 1, imm:$STRM, $rA, $rB)>; +def : Pat<(int_ppc_altivec_dstst i64:$rA, i32:$rB, imm:$STRM), + (DSTST64 0, imm:$STRM, $rA, $rB)>; +def : Pat<(int_ppc_altivec_dststt i64:$rA, i32:$rB, imm:$STRM), + (DSTSTT64 1, imm:$STRM, $rA, $rB)>; // Loads. def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>; // Stores. -def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst), - (STVX (v4i32 VRRC:$rS), xoaddr:$dst)>; +def : Pat<(store v4i32:$rS, xoaddr:$dst), + (STVX $rS, xoaddr:$dst)>; // Bit conversions. def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>; @@ -642,82 +730,99 @@ def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>; // Shuffles. // Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x) -def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef), - (VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>; -def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef), - (VPKUWUM VRRC:$vA, VRRC:$vA)>; -def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef), - (VPKUHUM VRRC:$vA, VRRC:$vA)>; +def:Pat<(vsldoi_unary_shuffle:$in v16i8:$vA, undef), + (VSLDOI $vA, $vA, (VSLDOI_unary_get_imm $in))>; +def:Pat<(vpkuwum_unary_shuffle v16i8:$vA, undef), + (VPKUWUM $vA, $vA)>; +def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef), + (VPKUHUM $vA, $vA)>; // Match vmrg*(x,x) -def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef), - (VMRGLB VRRC:$vA, VRRC:$vA)>; -def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef), - (VMRGLH VRRC:$vA, VRRC:$vA)>; -def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef), - (VMRGLW VRRC:$vA, VRRC:$vA)>; -def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef), - (VMRGHB VRRC:$vA, VRRC:$vA)>; -def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef), - (VMRGHH VRRC:$vA, VRRC:$vA)>; -def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef), - (VMRGHW VRRC:$vA, VRRC:$vA)>; +def:Pat<(vmrglb_unary_shuffle v16i8:$vA, undef), + (VMRGLB $vA, $vA)>; +def:Pat<(vmrglh_unary_shuffle v16i8:$vA, undef), + (VMRGLH $vA, $vA)>; +def:Pat<(vmrglw_unary_shuffle v16i8:$vA, undef), + (VMRGLW $vA, $vA)>; +def:Pat<(vmrghb_unary_shuffle v16i8:$vA, undef), + (VMRGHB $vA, $vA)>; +def:Pat<(vmrghh_unary_shuffle v16i8:$vA, undef), + (VMRGHH $vA, $vA)>; +def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef), + (VMRGHW $vA, $vA)>; // Logical Operations -def : Pat<(v4i32 (vnot_ppc VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>; +def : Pat<(vnot_ppc v4i32:$vA), (VNOR $vA, $vA)>; -def : Pat<(v4i32 (vnot_ppc (or VRRC:$A, VRRC:$B))), - (VNOR VRRC:$A, VRRC:$B)>; -def : Pat<(v4i32 (and VRRC:$A, (vnot_ppc VRRC:$B))), - (VANDC VRRC:$A, VRRC:$B)>; +def : Pat<(vnot_ppc (or v4i32:$A, v4i32:$B)), + (VNOR $A, $B)>; +def : Pat<(and v4i32:$A, (vnot_ppc v4i32:$B)), + (VANDC $A, $B)>; -def : Pat<(fmul VRRC:$vA, VRRC:$vB), - (VMADDFP VRRC:$vA, VRRC:$vB, (v4i32 (V_SET0)))>; +def : Pat<(fmul v4f32:$vA, v4f32:$vB), + (VMADDFP $vA, $vB, + (v4i32 (VSLW (V_SETALLONES), (V_SETALLONES))))>; // Fused multiply add and multiply sub for packed float. These are represented // separately from the real instructions above, for operations that must have // the additional precision, such as Newton-Rhapson (used by divide, sqrt) -def : Pat<(PPCvmaddfp VRRC:$A, VRRC:$B, VRRC:$C), - (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>; -def : Pat<(PPCvnmsubfp VRRC:$A, VRRC:$B, VRRC:$C), - (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>; +def : Pat<(PPCvmaddfp v4f32:$A, v4f32:$B, v4f32:$C), + (VMADDFP $A, $B, $C)>; +def : Pat<(PPCvnmsubfp v4f32:$A, v4f32:$B, v4f32:$C), + (VNMSUBFP $A, $B, $C)>; + +def : Pat<(int_ppc_altivec_vmaddfp v4f32:$A, v4f32:$B, v4f32:$C), + (VMADDFP $A, $B, $C)>; +def : Pat<(int_ppc_altivec_vnmsubfp v4f32:$A, v4f32:$B, v4f32:$C), + (VNMSUBFP $A, $B, $C)>; -def : Pat<(int_ppc_altivec_vmaddfp VRRC:$A, VRRC:$B, VRRC:$C), - (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>; -def : Pat<(int_ppc_altivec_vnmsubfp VRRC:$A, VRRC:$B, VRRC:$C), - (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>; +def : Pat<(PPCvperm v16i8:$vA, v16i8:$vB, v16i8:$vC), + (VPERM $vA, $vB, $vC)>; -def : Pat<(PPCvperm (v16i8 VRRC:$vA), VRRC:$vB, VRRC:$vC), - (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC)>; +def : Pat<(PPCfre v4f32:$A), (VREFP $A)>; +def : Pat<(PPCfrsqrte v4f32:$A), (VRSQRTEFP $A)>; // Vector shifts -def : Pat<(v16i8 (shl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))), - (v16i8 (VSLB VRRC:$vA, VRRC:$vB))>; -def : Pat<(v8i16 (shl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))), - (v8i16 (VSLH VRRC:$vA, VRRC:$vB))>; -def : Pat<(v4i32 (shl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))), - (v4i32 (VSLW VRRC:$vA, VRRC:$vB))>; - -def : Pat<(v16i8 (srl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))), - (v16i8 (VSRB VRRC:$vA, VRRC:$vB))>; -def : Pat<(v8i16 (srl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))), - (v8i16 (VSRH VRRC:$vA, VRRC:$vB))>; -def : Pat<(v4i32 (srl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))), - (v4i32 (VSRW VRRC:$vA, VRRC:$vB))>; - -def : Pat<(v16i8 (sra (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))), - (v16i8 (VSRAB VRRC:$vA, VRRC:$vB))>; -def : Pat<(v8i16 (sra (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))), - (v8i16 (VSRAH VRRC:$vA, VRRC:$vB))>; -def : Pat<(v4i32 (sra (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))), - (v4i32 (VSRAW VRRC:$vA, VRRC:$vB))>; +def : Pat<(v16i8 (shl v16i8:$vA, v16i8:$vB)), + (v16i8 (VSLB $vA, $vB))>; +def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)), + (v8i16 (VSLH $vA, $vB))>; +def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)), + (v4i32 (VSLW $vA, $vB))>; + +def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)), + (v16i8 (VSRB $vA, $vB))>; +def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)), + (v8i16 (VSRH $vA, $vB))>; +def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)), + (v4i32 (VSRW $vA, $vB))>; + +def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)), + (v16i8 (VSRAB $vA, $vB))>; +def : Pat<(v8i16 (sra v8i16:$vA, v8i16:$vB)), + (v8i16 (VSRAH $vA, $vB))>; +def : Pat<(v4i32 (sra v4i32:$vA, v4i32:$vB)), + (v4i32 (VSRAW $vA, $vB))>; // Float to integer and integer to float conversions -def : Pat<(v4i32 (fp_to_sint (v4f32 VRRC:$vA))), - (VCTSXS_0 VRRC:$vA)>; -def : Pat<(v4i32 (fp_to_uint (v4f32 VRRC:$vA))), - (VCTUXS_0 VRRC:$vA)>; -def : Pat<(v4f32 (sint_to_fp (v4i32 VRRC:$vA))), - (VCFSX_0 VRRC:$vA)>; -def : Pat<(v4f32 (uint_to_fp (v4i32 VRRC:$vA))), - (VCFUX_0 VRRC:$vA)>; +def : Pat<(v4i32 (fp_to_sint v4f32:$vA)), + (VCTSXS_0 $vA)>; +def : Pat<(v4i32 (fp_to_uint v4f32:$vA)), + (VCTUXS_0 $vA)>; +def : Pat<(v4f32 (sint_to_fp v4i32:$vA)), + (VCFSX_0 $vA)>; +def : Pat<(v4f32 (uint_to_fp v4i32:$vA)), + (VCFUX_0 $vA)>; + +// Floating-point rounding +def : Pat<(v4f32 (ffloor v4f32:$vA)), + (VRFIM $vA)>; +def : Pat<(v4f32 (fceil v4f32:$vA)), + (VRFIP $vA)>; +def : Pat<(v4f32 (ftrunc v4f32:$vA)), + (VRFIZ $vA)>; +def : Pat<(v4f32 (fnearbyint v4f32:$vA)), + (VRFIN $vA)>; + +} // end HasAltivec + diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td index c3c171c..400b7e3 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -120,6 +120,18 @@ class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL, let CR = 0; } +class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk, + dag OOL, dag IOL, string asmstr> + : I<opcode, OOL, IOL, asmstr, BrB> { + bits<14> BD; + + let Inst{6-10} = bo; + let Inst{11-15} = bi; + let Inst{16-29} = BD; + let Inst{30} = aa; + let Inst{31} = lk; +} + // 1.7.4 D-Form class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> @@ -664,14 +676,13 @@ class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr, // This is probably 1.7.9, but I don't have the reference that uses this // numbering scheme... class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, - string cstr, InstrItinClass itin, list<dag>pattern> + InstrItinClass itin, list<dag>pattern> : I<opcode, OOL, IOL, asmstr, itin> { bits<8> FM; bits<5> rT; bit RC = 0; // set by isDOT let Pattern = pattern; - let Constraints = cstr; let Inst{6} = 0; let Inst{7-14} = FM; @@ -765,16 +776,14 @@ class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, bits<5> RT; bits<5> RA; bits<5> RB; - bits<7> BIBO; // 2 bits of BI and 5 bits of BO (must be 12). - bits<3> CR; + bits<5> COND; let Pattern = pattern; let Inst{6-10} = RT; let Inst{11-15} = RA; let Inst{16-20} = RB; - let Inst{21-23} = CR; - let Inst{24-25} = BIBO{6-5}; + let Inst{21-25} = COND; let Inst{26-30} = xo; let Inst{31} = 0; } @@ -987,6 +996,7 @@ class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr, //===----------------------------------------------------------------------===// class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern> : I<0, OOL, IOL, asmstr, NoItinerary> { + let isCodeGenOnly = 1; let PPC64 = 0; let Pattern = pattern; let Inst{31-0} = 0; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index d9d6844..69c54ed 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -12,12 +12,13 @@ //===----------------------------------------------------------------------===// #include "PPCInstrInfo.h" +#include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" +#include "PPCHazardRecognizers.h" #include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" #include "PPCTargetMachine.h" -#include "PPCHazardRecognizers.h" -#include "MCTargetDesc/PPCPredicates.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -28,16 +29,10 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/STLExtras.h" #define GET_INSTRINFO_CTOR #include "PPCGenInstrInfo.inc" -namespace llvm { -extern cl::opt<bool> DisablePPC32RS; -extern cl::opt<bool> DisablePPC64RS; -} - using namespace llvm; static cl:: @@ -60,7 +55,7 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( return new PPCScoreboardHazardRecognizer(II, DAG); } - return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG); + return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG); } /// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer @@ -99,12 +94,18 @@ bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { + // Note: This list must be kept consistent with LoadRegFromStackSlot. switch (MI->getOpcode()) { default: break; case PPC::LD: case PPC::LWZ: case PPC::LFS: case PPC::LFD: + case PPC::RESTORE_CR: + case PPC::LVX: + case PPC::RESTORE_VRSAVE: + // Check for the operands added by addFrameReference (the immediate is the + // offset which defaults to 0). if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() && MI->getOperand(2).isFI()) { FrameIndex = MI->getOperand(2).getIndex(); @@ -117,12 +118,18 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { + // Note: This list must be kept consistent with StoreRegToStackSlot. switch (MI->getOpcode()) { default: break; case PPC::STD: case PPC::STW: case PPC::STFS: case PPC::STFD: + case PPC::SPILL_CR: + case PPC::STVX: + case PPC::SPILL_VRSAVE: + // Check for the operands added by addFrameReference (the immediate is the + // offset which defaults to 0). if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() && MI->getOperand(2).isFI()) { FrameIndex = MI->getOperand(2).getIndex(); @@ -141,7 +148,7 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { // Normal instructions can be commuted the obvious way. if (MI->getOpcode() != PPC::RLWIMI) - return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); + return TargetInstrInfo::commuteInstruction(MI, NewMI); // Cannot commute if it has a non-zero rotate count. if (MI->getOperand(3).getImm() != 0) @@ -444,40 +451,22 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const{ + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI, bool &SpillsVRS) const{ + // Note: If additional store instructions are added here, + // update isStoreToStackSlot. + DebugLoc DL; if (PPC::GPRCRegClass.hasSubClassEq(RC)) { - if (SrcReg != PPC::LR) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) - .addReg(SrcReg, - getKillRegState(isKill)), - FrameIdx)); - } else { - // FIXME: this spills LR immediately to memory in one step. To do this, - // we use R11, which we know cannot be used in the prolog/epilog. This is - // a hack. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR), PPC::R11)); - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) - .addReg(PPC::R11, - getKillRegState(isKill)), - FrameIdx)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) { - if (SrcReg != PPC::LR8) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) - .addReg(SrcReg, - getKillRegState(isKill)), - FrameIdx)); - } else { - // FIXME: this spills LR immediately to memory in one step. To do this, - // we use X11, which we know cannot be used in the prolog/epilog. This is - // a hack. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11)); - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) - .addReg(PPC::X11, - getKillRegState(isKill)), - FrameIdx)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD)) .addReg(SrcReg, @@ -489,47 +478,11 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { - if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || - (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) - .addReg(SrcReg, - getKillRegState(isKill)), - FrameIdx)); - return true; - } else { - // FIXME: We need a scatch reg here. The trouble with using R0 is that - // it's possible for the stack frame to be so big the save location is - // out of range of immediate offsets, necessitating another register. - // We hack this on Darwin by reserving R2. It's probably broken on Linux - // at the moment. - - bool is64Bit = TM.getSubtargetImpl()->isPPC64(); - // We need to store the CR in the low 4-bits of the saved value. First, - // issue a MFCR to save all of the CRBits. - unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? - (is64Bit ? PPC::X2 : PPC::R2) : - (is64Bit ? PPC::X0 : PPC::R0); - NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud : - PPC::MFCRpseud), ScratchReg) - .addReg(SrcReg, getKillRegState(isKill))); - - // If the saved register wasn't CR0, shift the bits left so that they are - // in CR0's slot. - if (SrcReg != PPC::CR0) { - unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4; - // rlwinm scratch, scratch, ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 : - PPC::RLWINM), ScratchReg) - .addReg(ScratchReg).addImm(ShiftBits) - .addImm(0).addImm(31)); - } - - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ? - PPC::STW8 : PPC::STW)) - .addReg(ScratchReg, - getKillRegState(isKill)), - FrameIdx)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + return true; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { // FIXME: We use CRi here because there is no mtcrf on a bit. Since the // backend currently only uses CR1EQ as an individual bit, this should @@ -562,23 +515,22 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, Reg = PPC::CR7; return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, - &PPC::CRRCRegClass, NewMIs); + &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS); } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { - // We don't have indexed addressing for vector loads. Emit: - // R0 = ADDI FI# - // STVX VAL, 0, R0 - // - // FIXME: We use R0 here, because it isn't available for RA. - bool Is64Bit = TM.getSubtargetImpl()->isPPC64(); - unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI; - unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0), - FrameIdx, 0, 0)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX)) - .addReg(SrcReg, getKillRegState(isKill)) - .addReg(GPR0) - .addReg(GPR0)); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; + } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { + assert(TM.getSubtargetImpl()->isDarwin() && + "VRSAVE only needs spill/restore on Darwin"); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + SpillsVRS = true; } else { llvm_unreachable("Unknown regclass!"); } @@ -595,10 +547,19 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); SmallVector<MachineInstr*, 4> NewMIs; - if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) { - PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setHasSpills(); + + bool NonRI = false, SpillsVRS = false; + if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs, + NonRI, SpillsVRS)) FuncInfo->setSpillsCR(); - } + + if (SpillsVRS) + FuncInfo->setSpillsVRSAVE(); + + if (NonRI) + FuncInfo->setHasNonRISpills(); for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); @@ -616,25 +577,17 @@ bool PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs)const{ + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI, bool &SpillsVRS) const{ + // Note: If additional load instructions are added here, + // update isLoadFromStackSlot. + if (PPC::GPRCRegClass.hasSubClassEq(RC)) { - if (DestReg != PPC::LR) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), - DestReg), FrameIdx)); - } else { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), - PPC::R11), FrameIdx)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), + DestReg), FrameIdx)); } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) { - if (DestReg != PPC::LR8) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg), - FrameIdx)); - } else { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), - PPC::X11), FrameIdx)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::X11)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg), + FrameIdx)); } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg), FrameIdx)); @@ -642,37 +595,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg), FrameIdx)); } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { - if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || - (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, - get(PPC::RESTORE_CR), DestReg) - , FrameIdx)); - return true; - } else { - // FIXME: We need a scatch reg here. The trouble with using R0 is that - // it's possible for the stack frame to be so big the save location is - // out of range of immediate offsets, necessitating another register. - // We hack this on Darwin by reserving R2. It's probably broken on Linux - // at the moment. - unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? - PPC::R2 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), - ScratchReg), FrameIdx)); - - // If the reloaded register isn't CR0, shift the bits right so that they are - // in the right CR's slot. - if (DestReg != PPC::CR0) { - unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; - // rlwinm r11, r11, 32-ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) - .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0) - .addImm(31)); - } - - NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ? - PPC::MTCRF8 : PPC::MTCRF), DestReg) - .addReg(ScratchReg)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, + get(PPC::RESTORE_CR), DestReg), + FrameIdx)); + return true; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { unsigned Reg = 0; @@ -702,21 +628,20 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, Reg = PPC::CR7; return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, - &PPC::CRRCRegClass, NewMIs); + &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS); } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { - // We don't have indexed addressing for vector loads. Emit: - // R0 = ADDI FI# - // Dest = LVX 0, R0 - // - // FIXME: We use R0 here, because it isn't available for RA. - bool Is64Bit = TM.getSubtargetImpl()->isPPC64(); - unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI; - unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0), - FrameIdx, 0, 0)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(GPR0) - .addReg(GPR0)); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg), + FrameIdx)); + NonRI = true; + } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { + assert(TM.getSubtargetImpl()->isDarwin() && + "VRSAVE only needs spill/restore on Darwin"); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, + get(PPC::RESTORE_VRSAVE), + DestReg), + FrameIdx)); + SpillsVRS = true; } else { llvm_unreachable("Unknown regclass!"); } @@ -734,10 +659,21 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, SmallVector<MachineInstr*, 4> NewMIs; DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); - if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) { - PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setHasSpills(); + + bool NonRI = false, SpillsVRS = false; + if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs, + NonRI, SpillsVRS)) FuncInfo->setSpillsCR(); - } + + if (SpillsVRS) + FuncInfo->setSpillsVRSAVE(); + + if (NonRI) + FuncInfo->setHasNonRISpills(); + for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); @@ -786,8 +722,8 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case PPC::GC_LABEL: case PPC::DBG_VALUE: return 0; - case PPC::BL8_NOP_ELF: - case PPC::BLA8_NOP_ELF: + case PPC::BL8_NOP: + case PPC::BLA8_NOP: return 8; default: return 4; // PowerPC instructions are all 4 bytes diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 374213e..635e348 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -71,11 +71,13 @@ class PPCInstrInfo : public PPCGenInstrInfo { bool StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const; + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI, bool &SpillsVRS) const; bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const; + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI, bool &SpillsVRS) const; public: explicit PPCInstrInfo(PPCTargetMachine &TM); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 6ee045a..ab90762 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -20,6 +20,10 @@ include "PPCInstrFormats.td" def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx SDTCisVT<0, f64>, SDTCisPtrTy<1> ]>; +def SDT_PPClfiwx : SDTypeProfile<1, 1, [ // lfiw[az]x + SDTCisVT<0, f64>, SDTCisPtrTy<1> +]>; + def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; @@ -36,10 +40,10 @@ def SDT_PPCcondbr : SDTypeProfile<0, 3, [ ]>; def SDT_PPClbrx : SDTypeProfile<1, 2, [ - SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> + SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> ]>; def SDT_PPCstbrx : SDTypeProfile<0, 3, [ - SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> + SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> ]>; def SDT_PPClarx : SDTypeProfile<1, 1, [ @@ -53,32 +57,36 @@ def SDT_PPCTC_ret : SDTypeProfile<0, 2, [ SDTCisPtrTy<0>, SDTCisVT<1, i32> ]>; -def SDT_PPCnop : SDTypeProfile<0, 0, []>; //===----------------------------------------------------------------------===// // PowerPC specific DAG Nodes. // -def PPCfcfid : SDNode<"PPCISD::FCFID" , SDTFPUnaryOp, []>; +def PPCfre : SDNode<"PPCISD::FRE", SDTFPUnaryOp, []>; +def PPCfrsqrte: SDNode<"PPCISD::FRSQRTE", SDTFPUnaryOp, []>; + +def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>; +def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>; +def PPCfcfids : SDNode<"PPCISD::FCFIDS", SDTFPRoundOp, []>; +def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>; def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>; def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>; +def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>; +def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>; def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx, [SDNPHasChain, SDNPMayStore]>; +def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx, + [SDNPHasChain, SDNPMayLoad]>; +def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx, + [SDNPHasChain, SDNPMayLoad]>; + +// Extract FPSCR (not modeled at the DAG level). +def PPCmffs : SDNode<"PPCISD::MFFS", + SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>; + +// Perform FADD in round-to-zero mode. +def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>; -// This sequence is used for long double->int conversions. It changes the -// bits in the FPSCR which is not modelled. -def PPCmffs : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, - [SDNPOutGlue]>; -def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>, - [SDNPInGlue, SDNPOutGlue]>; -def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>, - [SDNPInGlue, SDNPOutGlue]>; -def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, - [SDNPInGlue, SDNPOutGlue]>; -def PPCmtfsf : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3, - [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>, - SDTCisVT<3, f64>]>, - [SDNPInGlue]>; def PPCfsel : SDNode<"PPCISD::FSEL", // Type constraint for fsel. @@ -91,6 +99,20 @@ def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>; def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>; def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>; +def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>; +def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp, + [SDNPMayLoad]>; +def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>; +def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>; +def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>; +def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>; +def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>; +def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>; +def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>; +def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp, + [SDNPHasChain]>; +def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; + def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; // These nodes represent the 32-bit PPC shifts that operate on 6-bit shift @@ -99,10 +121,6 @@ def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>; def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>; def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>; -def PPCextsw_32 : SDNode<"PPCISD::EXTSW_32" , SDTIntUnaryOp>; -def PPCstd_32 : SDNode<"PPCISD::STD_32" , SDTStore, - [SDNPHasChain, SDNPMayStore]>; - // These are target-independent nodes, but have target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; @@ -110,16 +128,12 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; -def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; -def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; -def PPCcall_nop_SVR4 : SDNode<"PPCISD::CALL_NOP_SVR4", SDT_PPCCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; -def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>; +def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>, @@ -130,13 +144,9 @@ def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>, SDNPInGlue, SDNPOutGlue]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; - -def PPCbctrl_SVR4 : SDNode<"PPCISD::BCTRL_SVR4", SDTNone, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; +def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; @@ -144,6 +154,14 @@ def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone, def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def PPCeh_sjlj_setjmp : SDNode<"PPCISD::EH_SJLJ_SETJMP", + SDTypeProfile<1, 1, [SDTCisInt<0>, + SDTCisPtrTy<1>]>, + [SDNPHasChain, SDNPSideEffect]>; +def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP", + SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPSideEffect]>; + def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>; @@ -167,6 +185,12 @@ def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx, def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx, [SDNPHasChain, SDNPMayStore]>; +// Instructions to support medium and large code model +def PPCaddisTocHA : SDNode<"PPCISD::ADDIS_TOC_HA", SDTIntBinOp, []>; +def PPCldTocL : SDNode<"PPCISD::LD_TOC_L", SDTIntBinOp, [SDNPMayLoad]>; +def PPCaddiTocL : SDNode<"PPCISD::ADDI_TOC_L", SDTIntBinOp, []>; + + // Instructions to support dynamic alloca. def SDTDynOp : SDTypeProfile<1, 2, []>; def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>; @@ -258,6 +282,38 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{ return N->getZExtValue() == (uint64_t)(int)N->getZExtValue(); }], HI16>; +// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require +// restricted memrix (offset/4) constants are alignment sensitive. If these +// offsets are hidden behind TOC entries than the values of the lower-order +// bits cannot be checked directly. As a result, we need to also incorporate +// an alignment check into the relevant patterns. + +def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4pre_store : PatFrag< + (ops node:$val, node:$base, node:$offset), + (pre_store node:$val, node:$base, node:$offset), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 4; +}]>; + +def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() < 4; +}]>; +def unaligned4store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() < 4; +}]>; +def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() < 4; +}]>; //===----------------------------------------------------------------------===// // PowerPC Flag Definitions. @@ -294,9 +350,6 @@ def s16imm : Operand<i32> { def u16imm : Operand<i32> { let PrintMethod = "printU16ImmOperand"; } -def s16immX4 : Operand<i32> { // Multiply imm by 4 before printing. - let PrintMethod = "printS16X4ImmOperand"; -} def directbrtarget : Operand<OtherVT> { let PrintMethod = "printBranchOperand"; let EncoderMethod = "getDirectBrEncoding"; @@ -324,26 +377,37 @@ def crbitm: Operand<i8> { let EncoderMethod = "get_crbitm_encoding"; } // Address operands +// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode). +def ptr_rc_nor0 : PointerLikeRegClass<1>; + +def dispRI : Operand<iPTR>; +def dispRIX : Operand<iPTR>; + def memri : Operand<iPTR> { let PrintMethod = "printMemRegImm"; - let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg); + let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIEncoding"; } def memrr : Operand<iPTR> { let PrintMethod = "printMemRegReg"; - let MIOperandInfo = (ops ptr_rc:$offreg, ptr_rc:$ptrreg); + let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc:$offreg); } def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits. let PrintMethod = "printMemRegImmShifted"; - let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg); + let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIXEncoding"; } -// PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg -// that doesn't matter. -def pred : PredicateOperand<OtherVT, (ops imm, CRRC), - (ops (i32 20), (i32 zero_reg))> { +// A single-register address. This is used with the SjLj +// pseudo-instructions. +def memr : Operand<iPTR> { + let MIOperandInfo = (ops ptr_rc:$ptrreg); +} + +// PowerPC Predicate operand. +def pred : Operand<OtherVT> { let PrintMethod = "printPredicateOperand"; + let MIOperandInfo = (ops i32imm:$bibo, CRRC:$reg); } // Define PowerPC specific addressing mode. @@ -352,9 +416,12 @@ def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>; def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>; def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std" +// The address in a single register. This is used with the SjLj +// pseudo-instructions. +def addr : ComplexPattern<iPTR, 1, "SelectAddr",[], []>; + /// This is just the offset part of iaddr, used for preinc. def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>; -def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>; //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. @@ -381,17 +448,22 @@ def UPDATE_VRSAVE : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS), let Defs = [R1], Uses = [R1] in def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "#DYNALLOC", - [(set GPRC:$result, - (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>; + [(set i32:$result, + (PPCdynalloc i32:$negsize, iaddr:$fpsi))]>; // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after // instruction selection into a branch sequence. let usesCustomInserter = 1, // Expanded after instruction selection. PPC970_Single = 1 in { - def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F, + // Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes + // because either operand might become the first operand in an isel, and + // that operand cannot be r0. + def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, + GPRC_NOR0:$T, GPRC_NOR0:$F, i32imm:$BROPC), "#SELECT_CC_I4", []>; - def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F, + def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, + G8RC_NOX0:$T, G8RC_NOX0:$F, i32imm:$BROPC), "#SELECT_CC_I8", []>; def SELECT_CC_F4 : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F, @@ -418,10 +490,9 @@ def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F), "#RESTORE_CR", []>; let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { - let isCodeGenOnly = 1, isReturn = 1, Uses = [LR, RM] in - def BLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$p), - "b${p:cc}lr ${p:reg}", BrB, - [(retflag)]>; + let isReturn = 1, Uses = [LR, RM] in + def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", BrB, + [(retflag)]>; let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>; } @@ -453,46 +524,29 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { } } -// Darwin ABI Calls. -let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { - // Convenient aliases for call instructions - let Uses = [RM] in { - def BL_Darwin : IForm<18, 0, 1, - (outs), (ins calltarget:$func), - "bl $func", BrB, []>; // See Pat patterns below. - def BLA_Darwin : IForm<18, 1, 1, - (outs), (ins aaddr:$func), - "bla $func", BrB, [(PPCcall_Darwin (i32 imm:$func))]>; - } - let Uses = [CTR, RM] in { - def BCTRL_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins), - "bctrl", BrB, - [(PPCbctrl_Darwin)]>, Requires<[In32BitMode]>; +// The unconditional BCL used by the SjLj setjmp code. +let isCall = 1, hasCtrlDep = 1, isCodeGenOnly = 1, PPC970_Unit = 7 in { + let Defs = [LR], Uses = [RM] in { + def BCLalways : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst), + "bcl 20, 31, $dst">; } } -// SVR4 ABI Calls. let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { // Convenient aliases for call instructions let Uses = [RM] in { - def BL_SVR4 : IForm<18, 0, 1, - (outs), (ins calltarget:$func), - "bl $func", BrB, []>; // See Pat patterns below. - def BLA_SVR4 : IForm<18, 1, 1, - (outs), (ins aaddr:$func), - "bla $func", BrB, - [(PPCcall_SVR4 (i32 imm:$func))]>; + def BL : IForm<18, 0, 1, (outs), (ins calltarget:$func), + "bl $func", BrB, []>; // See Pat patterns below. + def BLA : IForm<18, 1, 1, (outs), (ins aaddr:$func), + "bla $func", BrB, [(PPCcall (i32 imm:$func))]>; } let Uses = [CTR, RM] in { - def BCTRL_SVR4 : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins), - "bctrl", BrB, - [(PPCbctrl_SVR4)]>, Requires<[In32BitMode]>; + def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins), + "bctrl", BrB, [(PPCbctrl)]>, + Requires<[In32BitMode]>; } } - let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in def TCRETURNdi :Pseudo< (outs), (ins calltarget:$dst, i32imm:$offset), @@ -511,6 +565,8 @@ def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset), []>; +let isCodeGenOnly = 1 in { + let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, @@ -524,6 +580,7 @@ def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst), "b $dst", BrB, []>; +} let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in @@ -531,6 +588,22 @@ def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst), "ba $dst", BrB, []>; +let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { + def EH_SjLj_SetJmp32 : Pseudo<(outs GPRC:$dst), (ins memr:$buf), + "#EH_SJLJ_SETJMP32", + [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>, + Requires<[In32BitMode]>; + let isTerminator = 1 in + def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf), + "#EH_SJLJ_LONGJMP32", + [(PPCeh_sjlj_longjmp addr:$buf)]>, + Requires<[In32BitMode]>; +} + +let isBranch = 1, isTerminator = 1 in { + def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst), + "#EH_SjLj_Setup\t$dst", []>; +} // DCB* instructions. def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), @@ -566,93 +639,90 @@ let usesCustomInserter = 1 in { let Defs = [CR0] in { def ATOMIC_LOAD_ADD_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I8", - [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_SUB_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I8", - [(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_AND_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I8", - [(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_OR_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I8", - [(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_XOR_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "ATOMIC_LOAD_XOR_I8", - [(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_NAND_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I8", - [(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_ADD_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I16", - [(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_SUB_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I16", - [(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_AND_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I16", - [(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_OR_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I16", - [(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_XOR_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I16", - [(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_NAND_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I16", - [(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_ADD_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I32", - [(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_SUB_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I32", - [(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_AND_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I32", - [(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_OR_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I32", - [(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_XOR_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I32", - [(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_NAND_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I32", - [(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_CMP_SWAP_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I8", - [(set GPRC:$dst, - (atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; + [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>; def ATOMIC_CMP_SWAP_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new", - [(set GPRC:$dst, - (atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; + [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>; def ATOMIC_CMP_SWAP_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new", - [(set GPRC:$dst, - (atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; + [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>; def ATOMIC_SWAP_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_i8", - [(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>; + [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>; def ATOMIC_SWAP_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I16", - [(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>; + [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>; def ATOMIC_SWAP_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I32", - [(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>; + [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>; } } // Instructions to support atomic operations def LWARX : XForm_1<31, 20, (outs GPRC:$rD), (ins memrr:$src), "lwarx $rD, $src", LdStLWARX, - [(set GPRC:$rD, (PPClarx xoaddr:$src))]>; + [(set i32:$rD, (PPClarx xoaddr:$src))]>; let Defs = [CR0] in def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst), "stwcx. $rS, $dst", LdStSTWCX, - [(PPCstcx GPRC:$rS, xoaddr:$dst)]>, + [(PPCstcx i32:$rS, xoaddr:$dst)]>, isDOT; let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in @@ -666,94 +736,94 @@ def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>; let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src), "lbz $rD, $src", LdStLoad, - [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>; + [(set i32:$rD, (zextloadi8 iaddr:$src))]>; def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src), "lha $rD, $src", LdStLHA, - [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>, + [(set i32:$rD, (sextloadi16 iaddr:$src))]>, PPC970_DGroup_Cracked; def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src), "lhz $rD, $src", LdStLoad, - [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>; + [(set i32:$rD, (zextloadi16 iaddr:$src))]>; def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src), "lwz $rD, $src", LdStLoad, - [(set GPRC:$rD, (load iaddr:$src))]>; + [(set i32:$rD, (load iaddr:$src))]>; def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src), "lfs $rD, $src", LdStLFD, - [(set F4RC:$rD, (load iaddr:$src))]>; + [(set f32:$rD, (load iaddr:$src))]>; def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), "lfd $rD, $src", LdStLFD, - [(set F8RC:$rD, (load iaddr:$src))]>; + [(set f64:$rD, (load iaddr:$src))]>; // Unindexed (r+i) Loads with Update (preinc). let mayLoad = 1 in { -def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhau $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lfsu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lfdu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; // Indexed (r+r) Loads with Update (preinc). -def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result), +def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lbzux $rD, $addr", LdStLoadUpd, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result), +def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhaux $rD, $addr", LdStLHAU, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc:$ea_result), +def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhzux $rD, $addr", LdStLoadUpd, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result), +def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwzux $rD, $addr", LdStLoadUpd, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result), +def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lfsux $rD, $addr", LdStLFDU, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result), +def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lfdux $rD, $addr", LdStLFDU, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; } } @@ -763,32 +833,39 @@ def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result), let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZX : XForm_1<31, 87, (outs GPRC:$rD), (ins memrr:$src), "lbzx $rD, $src", LdStLoad, - [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>; + [(set i32:$rD, (zextloadi8 xaddr:$src))]>; def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src), "lhax $rD, $src", LdStLHA, - [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>, + [(set i32:$rD, (sextloadi16 xaddr:$src))]>, PPC970_DGroup_Cracked; def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src), "lhzx $rD, $src", LdStLoad, - [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>; + [(set i32:$rD, (zextloadi16 xaddr:$src))]>; def LWZX : XForm_1<31, 23, (outs GPRC:$rD), (ins memrr:$src), "lwzx $rD, $src", LdStLoad, - [(set GPRC:$rD, (load xaddr:$src))]>; + [(set i32:$rD, (load xaddr:$src))]>; def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src), "lhbrx $rD, $src", LdStLoad, - [(set GPRC:$rD, (PPClbrx xoaddr:$src, i16))]>; + [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>; def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src), "lwbrx $rD, $src", LdStLoad, - [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>; + [(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>; def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src), "lfsx $frD, $src", LdStLFD, - [(set F4RC:$frD, (load xaddr:$src))]>; + [(set f32:$frD, (load xaddr:$src))]>; def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src), "lfdx $frD, $src", LdStLFD, - [(set F8RC:$frD, (load xaddr:$src))]>; + [(set f64:$frD, (load xaddr:$src))]>; + +def LFIWAX : XForm_25<31, 855, (outs F8RC:$frD), (ins memrr:$src), + "lfiwax $frD, $src", LdStLFD, + [(set f64:$frD, (PPClfiwax xoaddr:$src))]>; +def LFIWZX : XForm_25<31, 887, (outs F8RC:$frD), (ins memrr:$src), + "lfiwzx $frD, $src", LdStLFD, + [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>; } //===----------------------------------------------------------------------===// @@ -799,137 +876,128 @@ def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src), let PPC970_Unit = 2 in { def STB : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src), "stb $rS, $src", LdStStore, - [(truncstorei8 GPRC:$rS, iaddr:$src)]>; + [(truncstorei8 i32:$rS, iaddr:$src)]>; def STH : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src), "sth $rS, $src", LdStStore, - [(truncstorei16 GPRC:$rS, iaddr:$src)]>; + [(truncstorei16 i32:$rS, iaddr:$src)]>; def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src), "stw $rS, $src", LdStStore, - [(store GPRC:$rS, iaddr:$src)]>; + [(store i32:$rS, iaddr:$src)]>; def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst), "stfs $rS, $dst", LdStSTFD, - [(store F4RC:$rS, iaddr:$dst)]>; + [(store f32:$rS, iaddr:$dst)]>; def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), "stfd $rS, $dst", LdStSTFD, - [(store F8RC:$rS, iaddr:$dst)]>; + [(store f64:$rS, iaddr:$dst)]>; } // Unindexed (r+i) Stores with Update (preinc). -let PPC970_Unit = 2 in { -def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfsu $rS, $ptroff($ptrreg)", LdStSTFDU, - [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfdu $rS, $ptroff($ptrreg)", LdStSTFDU, - [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; +let PPC970_Unit = 2, mayStore = 1 in { +def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst), + "stbu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst), + "sthu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst), + "stwu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STFSU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memri:$dst), + "stfsu $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STFDU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memri:$dst), + "stfdu $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; } +// Patterns to match the pre-inc stores. We can't put the patterns on +// the instruction definitions directly as ISel wants the address base +// and offset to be separate operands, not a single complex operand. +def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STBU $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STHU $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STWU $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STFSU $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STFDU $rS, iaddroff:$ptroff, $ptrreg)>; // Indexed (r+r) Stores. -// let PPC970_Unit = 2 in { def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst), "stbx $rS, $dst", LdStStore, - [(truncstorei8 GPRC:$rS, xaddr:$dst)]>, + [(truncstorei8 i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst), "sthx $rS, $dst", LdStStore, - [(truncstorei16 GPRC:$rS, xaddr:$dst)]>, + [(truncstorei16 i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst), "stwx $rS, $dst", LdStStore, - [(store GPRC:$rS, xaddr:$dst)]>, - PPC970_DGroup_Cracked; - -def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res), - (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 GPRC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + [(store i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; -def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res), - (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 GPRC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res), - (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res), - (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfsux $rS, $ptroff, $ptrreg", LdStSTFDU, - [(set ptr_rc:$ea_res, - (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res), - (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfdux $rS, $ptroff, $ptrreg", LdStSTFDU, - [(set ptr_rc:$ea_res, - (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst), "sthbrx $rS, $dst", LdStStore, - [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>, + [(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>, PPC970_DGroup_Cracked; def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst), "stwbrx $rS, $dst", LdStStore, - [(PPCstbrx GPRC:$rS, xoaddr:$dst, i32)]>, + [(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>, PPC970_DGroup_Cracked; def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst), "stfiwx $frS, $dst", LdStSTFD, - [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>; + [(PPCstfiwx f64:$frS, xoaddr:$dst)]>; def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst), "stfsx $frS, $dst", LdStSTFD, - [(store F4RC:$frS, xaddr:$dst)]>; + [(store f32:$frS, xaddr:$dst)]>; def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst), "stfdx $frS, $dst", LdStSTFD, - [(store F8RC:$frS, xaddr:$dst)]>; + [(store f64:$frS, xaddr:$dst)]>; +} + +// Indexed (r+r) Stores with Update (preinc). +let PPC970_Unit = 2, mayStore = 1 in { +def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst), + "stbux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst), + "sthux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst), + "stwux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memrr:$dst), + "stfsux $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memrr:$dst), + "stfdux $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; } +// Patterns to match the pre-inc stores. We can't put the patterns on +// the instruction definitions directly as ISel wants the address base +// and offset to be separate operands, not a single complex operand. +def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STBUX $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STHUX $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STWUX $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STFSUX $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STFDUX $rS, $ptrreg, $ptroff)>; + def SYNC : XForm_24_sync<31, 598, (outs), (ins), "sync", LdStSync, [(int_ppc_sync)]>; @@ -939,68 +1007,66 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins), // let PPC970_Unit = 1 in { // FXU Operations. -def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), - "addi $rD, $rA, $imm", IntSimple, - [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>; -def ADDIL : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$imm), +def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$imm), "addi $rD, $rA, $imm", IntSimple, - [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>; + [(set i32:$rD, (add i32:$rA, immSExt16:$imm))]>; let Defs = [CARRY] in { def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "addic $rD, $rA, $imm", IntGeneral, - [(set GPRC:$rD, (addc GPRC:$rA, immSExt16:$imm))]>, + [(set i32:$rD, (addc i32:$rA, immSExt16:$imm))]>, PPC970_DGroup_Cracked; def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "addic. $rD, $rA, $imm", IntGeneral, []>; } -def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm), +def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolHi:$imm), "addis $rD, $rA, $imm", IntSimple, - [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>; -def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym), + [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>; +let isCodeGenOnly = 1 in +def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$sym), "la $rD, $sym($rA)", IntGeneral, - [(set GPRC:$rD, (add GPRC:$rA, + [(set i32:$rD, (add i32:$rA, (PPClo tglobaladdr:$sym, 0)))]>; def MULLI : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "mulli $rD, $rA, $imm", IntMulLI, - [(set GPRC:$rD, (mul GPRC:$rA, immSExt16:$imm))]>; + [(set i32:$rD, (mul i32:$rA, immSExt16:$imm))]>; let Defs = [CARRY] in { def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "subfic $rD, $rA, $imm", IntGeneral, - [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>; + [(set i32:$rD, (subc immSExt16:$imm, i32:$rA))]>; } let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm), "li $rD, $imm", IntSimple, - [(set GPRC:$rD, immSExt16:$imm)]>; + [(set i32:$rD, immSExt16:$imm)]>; def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm), "lis $rD, $imm", IntSimple, - [(set GPRC:$rD, imm16ShiftedSExt:$imm)]>; + [(set i32:$rD, imm16ShiftedSExt:$imm)]>; } } let PPC970_Unit = 1 in { // FXU Operations. def ANDIo : DForm_4<28, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), "andi. $dst, $src1, $src2", IntGeneral, - [(set GPRC:$dst, (and GPRC:$src1, immZExt16:$src2))]>, + [(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>, isDOT; def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), "andis. $dst, $src1, $src2", IntGeneral, - [(set GPRC:$dst, (and GPRC:$src1,imm16ShiftedZExt:$src2))]>, + [(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>, isDOT; def ORI : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), "ori $dst, $src1, $src2", IntSimple, - [(set GPRC:$dst, (or GPRC:$src1, immZExt16:$src2))]>; + [(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>; def ORIS : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), "oris $dst, $src1, $src2", IntSimple, - [(set GPRC:$dst, (or GPRC:$src1, imm16ShiftedZExt:$src2))]>; + [(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>; def XORI : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), "xori $dst, $src1, $src2", IntSimple, - [(set GPRC:$dst, (xor GPRC:$src1, immZExt16:$src2))]>; + [(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>; def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), "xoris $dst, $src1, $src2", IntSimple, - [(set GPRC:$dst, (xor GPRC:$src1,imm16ShiftedZExt:$src2))]>; + [(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>; def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntSimple, []>; def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm), @@ -1013,38 +1079,38 @@ def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2), let PPC970_Unit = 1 in { // FXU Operations. def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "nand $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (not (and GPRC:$rS, GPRC:$rB)))]>; + [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>; def AND : XForm_6<31, 28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "and $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (and GPRC:$rS, GPRC:$rB))]>; + [(set i32:$rA, (and i32:$rS, i32:$rB))]>; def ANDC : XForm_6<31, 60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "andc $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (and GPRC:$rS, (not GPRC:$rB)))]>; + [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>; def OR : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "or $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (or GPRC:$rS, GPRC:$rB))]>; + [(set i32:$rA, (or i32:$rS, i32:$rB))]>; def NOR : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "nor $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (not (or GPRC:$rS, GPRC:$rB)))]>; + [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>; def ORC : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "orc $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (or GPRC:$rS, (not GPRC:$rB)))]>; + [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>; def EQV : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "eqv $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (not (xor GPRC:$rS, GPRC:$rB)))]>; + [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>; def XOR : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "xor $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (xor GPRC:$rS, GPRC:$rB))]>; + [(set i32:$rA, (xor i32:$rS, i32:$rB))]>; def SLW : XForm_6<31, 24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "slw $rA, $rS, $rB", IntGeneral, - [(set GPRC:$rA, (PPCshl GPRC:$rS, GPRC:$rB))]>; + [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>; def SRW : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "srw $rA, $rS, $rB", IntGeneral, - [(set GPRC:$rA, (PPCsrl GPRC:$rS, GPRC:$rB))]>; + [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>; let Defs = [CARRY] in { def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "sraw $rA, $rS, $rB", IntShift, - [(set GPRC:$rA, (PPCsra GPRC:$rS, GPRC:$rB))]>; + [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>; } } @@ -1052,17 +1118,17 @@ let PPC970_Unit = 1 in { // FXU Operations. let Defs = [CARRY] in { def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH), "srawi $rA, $rS, $SH", IntShift, - [(set GPRC:$rA, (sra GPRC:$rS, (i32 imm:$SH)))]>; + [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>; } def CNTLZW : XForm_11<31, 26, (outs GPRC:$rA), (ins GPRC:$rS), "cntlzw $rA, $rS", IntGeneral, - [(set GPRC:$rA, (ctlz GPRC:$rS))]>; + [(set i32:$rA, (ctlz i32:$rS))]>; def EXTSB : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS), "extsb $rA, $rS", IntSimple, - [(set GPRC:$rA, (sext_inreg GPRC:$rS, i8))]>; + [(set i32:$rA, (sext_inreg i32:$rS, i8))]>; def EXTSH : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS), "extsh $rA, $rS", IntSimple, - [(set GPRC:$rA, (sext_inreg GPRC:$rS, i16))]>; + [(set i32:$rA, (sext_inreg i32:$rS, i16))]>; def CMPW : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB), "cmpw $crD, $rA, $rB", IntCompare>; @@ -1080,16 +1146,54 @@ def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB), let Uses = [RM] in { def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB), "fctiwz $frD, $frB", FPGeneral, - [(set F8RC:$frD, (PPCfctiwz F8RC:$frB))]>; + [(set f64:$frD, (PPCfctiwz f64:$frB))]>; + def FRSP : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB), "frsp $frD, $frB", FPGeneral, - [(set F4RC:$frD, (fround F8RC:$frB))]>; + [(set f32:$frD, (fround f64:$frB))]>; + + // The frin -> nearbyint mapping is valid only in fast-math mode. + def FRIND : XForm_26<63, 392, (outs F8RC:$frD), (ins F8RC:$frB), + "frin $frD, $frB", FPGeneral, + [(set f64:$frD, (fnearbyint f64:$frB))]>; + def FRINS : XForm_26<63, 392, (outs F4RC:$frD), (ins F4RC:$frB), + "frin $frD, $frB", FPGeneral, + [(set f32:$frD, (fnearbyint f32:$frB))]>; + + // These pseudos expand to rint but also set FE_INEXACT when the result does + // not equal the argument. + let usesCustomInserter = 1, Defs = [RM] in { // FIXME: Model FPSCR! + def FRINDrint : Pseudo<(outs F8RC:$frD), (ins F8RC:$frB), + "#FRINDrint", [(set f64:$frD, (frint f64:$frB))]>; + def FRINSrint : Pseudo<(outs F4RC:$frD), (ins F4RC:$frB), + "#FRINSrint", [(set f32:$frD, (frint f32:$frB))]>; + } + + def FRIPD : XForm_26<63, 456, (outs F8RC:$frD), (ins F8RC:$frB), + "frip $frD, $frB", FPGeneral, + [(set f64:$frD, (fceil f64:$frB))]>; + def FRIPS : XForm_26<63, 456, (outs F4RC:$frD), (ins F4RC:$frB), + "frip $frD, $frB", FPGeneral, + [(set f32:$frD, (fceil f32:$frB))]>; + def FRIZD : XForm_26<63, 424, (outs F8RC:$frD), (ins F8RC:$frB), + "friz $frD, $frB", FPGeneral, + [(set f64:$frD, (ftrunc f64:$frB))]>; + def FRIZS : XForm_26<63, 424, (outs F4RC:$frD), (ins F4RC:$frB), + "friz $frD, $frB", FPGeneral, + [(set f32:$frD, (ftrunc f32:$frB))]>; + def FRIMD : XForm_26<63, 488, (outs F8RC:$frD), (ins F8RC:$frB), + "frim $frD, $frB", FPGeneral, + [(set f64:$frD, (ffloor f64:$frB))]>; + def FRIMS : XForm_26<63, 488, (outs F4RC:$frD), (ins F4RC:$frB), + "frim $frD, $frB", FPGeneral, + [(set f32:$frD, (ffloor f32:$frB))]>; + def FSQRT : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB), "fsqrt $frD, $frB", FPSqrt, - [(set F8RC:$frD, (fsqrt F8RC:$frB))]>; + [(set f64:$frD, (fsqrt f64:$frB))]>; def FSQRTS : XForm_26<59, 22, (outs F4RC:$frD), (ins F4RC:$frB), "fsqrts $frD, $frB", FPSqrt, - [(set F4RC:$frD, (fsqrt F4RC:$frB))]>; + [(set f32:$frD, (fsqrt f32:$frB))]>; } } @@ -1099,31 +1203,44 @@ let Uses = [RM] in { /// sneak into a d-group with a store). def FMR : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB), "fmr $frD, $frB", FPGeneral, - []>, // (set F4RC:$frD, F4RC:$frB) + []>, // (set f32:$frD, f32:$frB) PPC970_Unit_Pseudo; let PPC970_Unit = 3 in { // FPU Operations. // These are artificially split into two different forms, for 4/8 byte FP. def FABSS : XForm_26<63, 264, (outs F4RC:$frD), (ins F4RC:$frB), "fabs $frD, $frB", FPGeneral, - [(set F4RC:$frD, (fabs F4RC:$frB))]>; + [(set f32:$frD, (fabs f32:$frB))]>; def FABSD : XForm_26<63, 264, (outs F8RC:$frD), (ins F8RC:$frB), "fabs $frD, $frB", FPGeneral, - [(set F8RC:$frD, (fabs F8RC:$frB))]>; + [(set f64:$frD, (fabs f64:$frB))]>; def FNABSS : XForm_26<63, 136, (outs F4RC:$frD), (ins F4RC:$frB), "fnabs $frD, $frB", FPGeneral, - [(set F4RC:$frD, (fneg (fabs F4RC:$frB)))]>; + [(set f32:$frD, (fneg (fabs f32:$frB)))]>; def FNABSD : XForm_26<63, 136, (outs F8RC:$frD), (ins F8RC:$frB), "fnabs $frD, $frB", FPGeneral, - [(set F8RC:$frD, (fneg (fabs F8RC:$frB)))]>; + [(set f64:$frD, (fneg (fabs f64:$frB)))]>; def FNEGS : XForm_26<63, 40, (outs F4RC:$frD), (ins F4RC:$frB), "fneg $frD, $frB", FPGeneral, - [(set F4RC:$frD, (fneg F4RC:$frB))]>; + [(set f32:$frD, (fneg f32:$frB))]>; def FNEGD : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB), "fneg $frD, $frB", FPGeneral, - [(set F8RC:$frD, (fneg F8RC:$frB))]>; + [(set f64:$frD, (fneg f64:$frB))]>; + +// Reciprocal estimates. +def FRE : XForm_26<63, 24, (outs F8RC:$frD), (ins F8RC:$frB), + "fre $frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfre f64:$frB))]>; +def FRES : XForm_26<59, 24, (outs F4RC:$frD), (ins F4RC:$frB), + "fres $frD, $frB", FPGeneral, + [(set f32:$frD, (PPCfre f32:$frB))]>; +def FRSQRTE : XForm_26<63, 26, (outs F8RC:$frD), (ins F8RC:$frB), + "frsqrte $frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfrsqrte f64:$frB))]>; +def FRSQRTES : XForm_26<59, 26, (outs F4RC:$frD), (ins F4RC:$frB), + "frsqrtes $frD, $frB", FPGeneral, + [(set f32:$frD, (PPCfrsqrte f32:$frB))]>; } - // XL-Form instructions. condition register logical ops. // @@ -1141,6 +1258,7 @@ def CROR : XLForm_1<19, 449, (outs CRBITRC:$CRD), "cror $CRD, $CRA, $CRB", BrCR, []>; +let isCodeGenOnly = 1 in { def CRSET : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins), "creqv $dst, $dst, $dst", BrCR, []>; @@ -1158,6 +1276,7 @@ def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins), "crxor 6, 6, 6", BrCR, [(PPCcr6unset)]>; } +} // XFX-Form instructions. Instructions that deal with SPRs. // @@ -1166,7 +1285,7 @@ def MFCTR : XFXForm_1_ext<31, 339, 9, (outs GPRC:$rT), (ins), "mfctr $rT", SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } -let Defs = [CTR], Pattern = [(PPCmtctr GPRC:$rS)] in { +let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in { def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins GPRC:$rS), "mtctr $rS", SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; @@ -1193,6 +1312,29 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins), "mfspr $rT, 256", IntGeneral>, PPC970_DGroup_First, PPC970_Unit_FXU; +let isCodeGenOnly = 1 in { + def MTVRSAVEv : XFXForm_7_ext<31, 467, 256, + (outs VRSAVERC:$reg), (ins GPRC:$rS), + "mtspr 256, $rS", IntGeneral>, + PPC970_DGroup_Single, PPC970_Unit_FXU; + def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), + (ins VRSAVERC:$reg), + "mfspr $rT, 256", IntGeneral>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} + +// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register, +// so we'll need to scavenge a register for it. +let mayStore = 1 in +def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F), + "#SPILL_VRSAVE", []>; + +// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously +// spilled), so we'll need to scavenge a register for it. +let mayLoad = 1 in +def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F), + "#RESTORE_VRSAVE", []>; + def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; @@ -1207,6 +1349,7 @@ def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS), // instruction to keep the register allocator from becoming confused. // // FIXME: Make this a real Pseudo instruction when the JIT switches to MC. +let isCodeGenOnly = 1 in def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), "#MFCRpseud", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; @@ -1219,38 +1362,29 @@ def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), "mfocrf $rT, $FXM", SprMFCR>, PPC970_DGroup_First, PPC970_Unit_CRU; -// Instructions to manipulate FPSCR. Only long double handling uses these. -// FPSCR is not modelled; we use the SDNode Flag to keep things in order. +// Pseudo instruction to perform FADD in round-to-zero mode. +let usesCustomInserter = 1, Uses = [RM] in { + def FADDrtz: Pseudo<(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "", + [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>; +} +// The above pseudo gets expanded to make use of the following instructions +// to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level. let Uses = [RM], Defs = [RM] in { def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM), - "mtfsb0 $FM", IntMTFSB0, - [(PPCmtfsb0 (i32 imm:$FM))]>, + "mtfsb0 $FM", IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM), - "mtfsb1 $FM", IntMTFSB0, - [(PPCmtfsb1 (i32 imm:$FM))]>, + "mtfsb1 $FM", IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; - // MTFSF does not actually produce an FP result. We pretend it copies - // input reg B to the output. If we didn't do this it would look like the - // instruction had no outputs (because we aren't modelling the FPSCR) and - // it would be deleted. - def MTFSF : XFLForm<63, 711, (outs F8RC:$FRA), - (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB), - "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0, - [(set F8RC:$FRA, (PPCmtfsf (i32 imm:$FM), - F8RC:$rT, F8RC:$FRB))]>, + def MTFSF : XFLForm<63, 711, (outs), (ins i32imm:$FM, F8RC:$rT), + "mtfsf $FM, $rT", IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; } let Uses = [RM] in { def MFFS : XForm_42<63, 583, (outs F8RC:$rT), (ins), "mffs $rT", IntMFFS, - [(set F8RC:$rT, (PPCmffs))]>, - PPC970_DGroup_Single, PPC970_Unit_FPU; - def FADDrtz: AForm_2<63, 21, - (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fadd $FRT, $FRA, $FRB", FPAddSub, - [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>, + [(set f64:$rT, (PPCmffs))]>, PPC970_DGroup_Single, PPC970_Unit_FPU; } @@ -1261,61 +1395,61 @@ let PPC970_Unit = 1 in { // FXU Operations. // def ADD4 : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "add $rT, $rA, $rB", IntSimple, - [(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>; + [(set i32:$rT, (add i32:$rA, i32:$rB))]>; let Defs = [CARRY] in { def ADDC : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "addc $rT, $rA, $rB", IntGeneral, - [(set GPRC:$rT, (addc GPRC:$rA, GPRC:$rB))]>, + [(set i32:$rT, (addc i32:$rA, i32:$rB))]>, PPC970_DGroup_Cracked; } def DIVW : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "divw $rT, $rA, $rB", IntDivW, - [(set GPRC:$rT, (sdiv GPRC:$rA, GPRC:$rB))]>, + [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>, PPC970_DGroup_First, PPC970_DGroup_Cracked; def DIVWU : XOForm_1<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "divwu $rT, $rA, $rB", IntDivW, - [(set GPRC:$rT, (udiv GPRC:$rA, GPRC:$rB))]>, + [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>, PPC970_DGroup_First, PPC970_DGroup_Cracked; def MULHW : XOForm_1<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "mulhw $rT, $rA, $rB", IntMulHW, - [(set GPRC:$rT, (mulhs GPRC:$rA, GPRC:$rB))]>; + [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>; def MULHWU : XOForm_1<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "mulhwu $rT, $rA, $rB", IntMulHWU, - [(set GPRC:$rT, (mulhu GPRC:$rA, GPRC:$rB))]>; + [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>; def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "mullw $rT, $rA, $rB", IntMulHW, - [(set GPRC:$rT, (mul GPRC:$rA, GPRC:$rB))]>; + [(set i32:$rT, (mul i32:$rA, i32:$rB))]>; def SUBF : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "subf $rT, $rA, $rB", IntGeneral, - [(set GPRC:$rT, (sub GPRC:$rB, GPRC:$rA))]>; + [(set i32:$rT, (sub i32:$rB, i32:$rA))]>; let Defs = [CARRY] in { def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "subfc $rT, $rA, $rB", IntGeneral, - [(set GPRC:$rT, (subc GPRC:$rB, GPRC:$rA))]>, + [(set i32:$rT, (subc i32:$rB, i32:$rA))]>, PPC970_DGroup_Cracked; } def NEG : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA), "neg $rT, $rA", IntSimple, - [(set GPRC:$rT, (ineg GPRC:$rA))]>; + [(set i32:$rT, (ineg i32:$rA))]>; let Uses = [CARRY], Defs = [CARRY] in { def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "adde $rT, $rA, $rB", IntGeneral, - [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>; + [(set i32:$rT, (adde i32:$rA, i32:$rB))]>; def ADDME : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA), "addme $rT, $rA", IntGeneral, - [(set GPRC:$rT, (adde GPRC:$rA, -1))]>; + [(set i32:$rT, (adde i32:$rA, -1))]>; def ADDZE : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA), "addze $rT, $rA", IntGeneral, - [(set GPRC:$rT, (adde GPRC:$rA, 0))]>; + [(set i32:$rT, (adde i32:$rA, 0))]>; def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "subfe $rT, $rA, $rB", IntGeneral, - [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>; + [(set i32:$rT, (sube i32:$rB, i32:$rA))]>; def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA), "subfme $rT, $rA", IntGeneral, - [(set GPRC:$rT, (sube -1, GPRC:$rA))]>; + [(set i32:$rT, (sube -1, i32:$rA))]>; def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA), "subfze $rT, $rA", IntGeneral, - [(set GPRC:$rT, (sube 0, GPRC:$rA))]>; + [(set i32:$rT, (sube 0, i32:$rA))]>; } } @@ -1327,43 +1461,41 @@ let Uses = [RM] in { def FMADD : AForm_1<63, 29, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fmadd $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, - (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB))]>; + [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>; def FMADDS : AForm_1<59, 29, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, - (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB))]>; + [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>; def FMSUB : AForm_1<63, 28, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fmsub $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, - (fma F8RC:$FRA, F8RC:$FRC, (fneg F8RC:$FRB)))]>; + [(set f64:$FRT, + (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>; def FMSUBS : AForm_1<59, 28, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, - (fma F4RC:$FRA, F4RC:$FRC, (fneg F4RC:$FRB)))]>; + [(set f32:$FRT, + (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>; def FNMADD : AForm_1<63, 31, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, - (fneg (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB)))]>; + [(set f64:$FRT, + (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>; def FNMADDS : AForm_1<59, 31, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, - (fneg (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB)))]>; + [(set f32:$FRT, + (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>; def FNMSUB : AForm_1<63, 30, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, (fneg (fma F8RC:$FRA, F8RC:$FRC, - (fneg F8RC:$FRB))))]>; + [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC, + (fneg f64:$FRB))))]>; def FNMSUBS : AForm_1<59, 30, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (fneg (fma F4RC:$FRA, F4RC:$FRC, - (fneg F4RC:$FRB))))]>; + [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC, + (fneg f32:$FRB))))]>; } // FSEL is artificially split into 4 and 8-byte forms for the result. To avoid // having 4 of these, force the comparison to always be an 8-byte double (code @@ -1372,50 +1504,50 @@ let Uses = [RM] in { def FSELD : AForm_1<63, 23, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F8RC:$FRT, (PPCfsel F8RC:$FRA,F8RC:$FRC,F8RC:$FRB))]>; + [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>; def FSELS : AForm_1<63, 23, (outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (PPCfsel F8RC:$FRA,F4RC:$FRC,F4RC:$FRB))]>; + [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>; let Uses = [RM] in { def FADD : AForm_2<63, 21, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "fadd $FRT, $FRA, $FRB", FPAddSub, - [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>; + [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>; def FADDS : AForm_2<59, 21, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), "fadds $FRT, $FRA, $FRB", FPGeneral, - [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>; + [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>; def FDIV : AForm_2<63, 18, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "fdiv $FRT, $FRA, $FRB", FPDivD, - [(set F8RC:$FRT, (fdiv F8RC:$FRA, F8RC:$FRB))]>; + [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>; def FDIVS : AForm_2<59, 18, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), "fdivs $FRT, $FRA, $FRB", FPDivS, - [(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>; + [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>; def FMUL : AForm_3<63, 25, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC), "fmul $FRT, $FRA, $FRC", FPFused, - [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRC))]>; + [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>; def FMULS : AForm_3<59, 25, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC), "fmuls $FRT, $FRA, $FRC", FPGeneral, - [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRC))]>; + [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>; def FSUB : AForm_2<63, 20, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "fsub $FRT, $FRA, $FRB", FPAddSub, - [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>; + [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>; def FSUBS : AForm_2<59, 20, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), "fsubs $FRT, $FRA, $FRB", FPGeneral, - [(set F4RC:$FRT, (fsub F4RC:$FRA, F4RC:$FRB))]>; + [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>; } } let PPC970_Unit = 1 in { // FXU Operations. def ISEL : AForm_4<31, 15, - (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond), + (outs GPRC:$rT), (ins GPRC_NOR0:$rA, GPRC:$rB, CRBITRC:$cond), "isel $rT, $rA, $rB, $cond", IntGeneral, []>; } @@ -1455,47 +1587,43 @@ def : Pat<(i32 imm:$imm), (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>; // Implement the 'not' operation with the NOR instruction. -def NOT : Pat<(not GPRC:$in), - (NOR GPRC:$in, GPRC:$in)>; +def NOT : Pat<(not i32:$in), + (NOR $in, $in)>; // ADD an arbitrary immediate. -def : Pat<(add GPRC:$in, imm:$imm), - (ADDIS (ADDI GPRC:$in, (LO16 imm:$imm)), (HA16 imm:$imm))>; +def : Pat<(add i32:$in, imm:$imm), + (ADDIS (ADDI $in, (LO16 imm:$imm)), (HA16 imm:$imm))>; // OR an arbitrary immediate. -def : Pat<(or GPRC:$in, imm:$imm), - (ORIS (ORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>; +def : Pat<(or i32:$in, imm:$imm), + (ORIS (ORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>; // XOR an arbitrary immediate. -def : Pat<(xor GPRC:$in, imm:$imm), - (XORIS (XORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>; +def : Pat<(xor i32:$in, imm:$imm), + (XORIS (XORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>; // SUBFIC -def : Pat<(sub immSExt16:$imm, GPRC:$in), - (SUBFIC GPRC:$in, imm:$imm)>; +def : Pat<(sub immSExt16:$imm, i32:$in), + (SUBFIC $in, imm:$imm)>; // SHL/SRL -def : Pat<(shl GPRC:$in, (i32 imm:$imm)), - (RLWINM GPRC:$in, imm:$imm, 0, (SHL32 imm:$imm))>; -def : Pat<(srl GPRC:$in, (i32 imm:$imm)), - (RLWINM GPRC:$in, (SRL32 imm:$imm), imm:$imm, 31)>; +def : Pat<(shl i32:$in, (i32 imm:$imm)), + (RLWINM $in, imm:$imm, 0, (SHL32 imm:$imm))>; +def : Pat<(srl i32:$in, (i32 imm:$imm)), + (RLWINM $in, (SRL32 imm:$imm), imm:$imm, 31)>; // ROTL -def : Pat<(rotl GPRC:$in, GPRC:$sh), - (RLWNM GPRC:$in, GPRC:$sh, 0, 31)>; -def : Pat<(rotl GPRC:$in, (i32 imm:$imm)), - (RLWINM GPRC:$in, imm:$imm, 0, 31)>; +def : Pat<(rotl i32:$in, i32:$sh), + (RLWNM $in, $sh, 0, 31)>; +def : Pat<(rotl i32:$in, (i32 imm:$imm)), + (RLWINM $in, imm:$imm, 0, 31)>; // RLWNM -def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm), - (RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>; +def : Pat<(and (rotl i32:$in, i32:$sh), maskimm32:$imm), + (RLWNM $in, $sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>; // Calls -def : Pat<(PPCcall_Darwin (i32 tglobaladdr:$dst)), - (BL_Darwin tglobaladdr:$dst)>; -def : Pat<(PPCcall_Darwin (i32 texternalsym:$dst)), - (BL_Darwin texternalsym:$dst)>; -def : Pat<(PPCcall_SVR4 (i32 tglobaladdr:$dst)), - (BL_SVR4 tglobaladdr:$dst)>; -def : Pat<(PPCcall_SVR4 (i32 texternalsym:$dst)), - (BL_SVR4 texternalsym:$dst)>; +def : Pat<(PPCcall (i32 tglobaladdr:$dst)), + (BL tglobaladdr:$dst)>; +def : Pat<(PPCcall (i32 texternalsym:$dst)), + (BL texternalsym:$dst)>; def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), @@ -1518,28 +1646,28 @@ def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>; def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>; def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>; def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>; -def : Pat<(PPChi tglobaltlsaddr:$g, GPRC:$in), - (ADDIS GPRC:$in, tglobaltlsaddr:$g)>; -def : Pat<(PPClo tglobaltlsaddr:$g, GPRC:$in), - (ADDIL GPRC:$in, tglobaltlsaddr:$g)>; -def : Pat<(add GPRC:$in, (PPChi tglobaladdr:$g, 0)), - (ADDIS GPRC:$in, tglobaladdr:$g)>; -def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)), - (ADDIS GPRC:$in, tconstpool:$g)>; -def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)), - (ADDIS GPRC:$in, tjumptable:$g)>; -def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)), - (ADDIS GPRC:$in, tblockaddress:$g)>; +def : Pat<(PPChi tglobaltlsaddr:$g, i32:$in), + (ADDIS $in, tglobaltlsaddr:$g)>; +def : Pat<(PPClo tglobaltlsaddr:$g, i32:$in), + (ADDI $in, tglobaltlsaddr:$g)>; +def : Pat<(add i32:$in, (PPChi tglobaladdr:$g, 0)), + (ADDIS $in, tglobaladdr:$g)>; +def : Pat<(add i32:$in, (PPChi tconstpool:$g, 0)), + (ADDIS $in, tconstpool:$g)>; +def : Pat<(add i32:$in, (PPChi tjumptable:$g, 0)), + (ADDIS $in, tjumptable:$g)>; +def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)), + (ADDIS $in, tblockaddress:$g)>; // Standard shifts. These are represented separately from the real shifts above // so that we can distinguish between shifts that allow 5-bit and 6-bit shift // amounts. -def : Pat<(sra GPRC:$rS, GPRC:$rB), - (SRAW GPRC:$rS, GPRC:$rB)>; -def : Pat<(srl GPRC:$rS, GPRC:$rB), - (SRW GPRC:$rS, GPRC:$rB)>; -def : Pat<(shl GPRC:$rS, GPRC:$rB), - (SLW GPRC:$rS, GPRC:$rB)>; +def : Pat<(sra i32:$rS, i32:$rB), + (SRAW $rS, $rB)>; +def : Pat<(srl i32:$rS, i32:$rB), + (SRW $rS, $rB)>; +def : Pat<(shl i32:$rS, i32:$rB), + (SLW $rS, $rB)>; def : Pat<(zextloadi1 iaddr:$src), (LBZ iaddr:$src)>; @@ -1562,8 +1690,8 @@ def : Pat<(f64 (extloadf32 iaddr:$src)), def : Pat<(f64 (extloadf32 xaddr:$src)), (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>; -def : Pat<(f64 (fextend F4RC:$src)), - (COPY_TO_REGCLASS F4RC:$src, F8RC)>; +def : Pat<(f64 (fextend f32:$src)), + (COPY_TO_REGCLASS $src, F8RC)>; // Memory barriers def : Pat<(membarrier (i32 imm /*ll*/), @@ -1575,5 +1703,15 @@ def : Pat<(membarrier (i32 imm /*ll*/), def : Pat<(atomic_fence (imm), (imm)), (SYNC)>; +// Additional FNMSUB patterns: -a*c + b == -(a*c - b) +def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B), + (FNMSUB $A, $C, $B)>; +def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B), + (FNMSUB $A, $C, $B)>; +def : Pat<(fma (fneg f32:$A), f32:$C, f32:$B), + (FNMSUBS $A, $C, $B)>; +def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B), + (FNMSUBS $A, $C, $B)>; + include "PPCInstrAltivec.td" include "PPCInstr64Bit.td" diff --git a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp index aba2739..cfcd749 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp @@ -15,10 +15,10 @@ #include "PPCJITInfo.h" #include "PPCRelocations.h" #include "PPCTargetMachine.h" -#include "llvm/Function.h" -#include "llvm/Support/Memory.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Memory.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -115,7 +115,7 @@ asm( "lwz r2, 208(r1)\n" // stub's frame "lwz r4, 8(r2)\n" // stub's lr "li r5, 0\n" // 0 == 32 bit - "bl _PPCCompilationCallbackC\n" + "bl _LLVMPPCCompilationCallback\n" "mtctr r3\n" // Restore all int arg registers "lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n" @@ -178,7 +178,7 @@ asm( "lwz 5, 104(1)\n" // stub's frame "lwz 4, 4(5)\n" // stub's lr "li 5, 0\n" // 0 == 32 bit - "bl PPCCompilationCallbackC\n" + "bl LLVMPPCCompilationCallback\n" "mtctr 3\n" // Restore all int arg registers "lwz 10, 100(1)\n" "lwz 9, 96(1)\n" @@ -259,10 +259,10 @@ asm( "ld 4, 16(5)\n" // stub's lr "li 5, 1\n" // 1 == 64 bit #ifdef __ELF__ - "bl PPCCompilationCallbackC\n" + "bl LLVMPPCCompilationCallback\n" "nop\n" #else - "bl _PPCCompilationCallbackC\n" + "bl _LLVMPPCCompilationCallback\n" #endif "mtctr 3\n" // Restore all int arg registers @@ -292,9 +292,10 @@ void PPC64CompilationCallback() { #endif extern "C" { -static void* LLVM_ATTRIBUTE_USED PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, - unsigned *OrigCallAddrPlus4, - bool is64Bit) { +LLVM_LIBRARY_VISIBILITY void * +LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4, + unsigned *OrigCallAddrPlus4, + bool is64Bit) { // Adjust the pointer to the address of the call instruction in the stub // emitted by emitFunctionStub, rather than the instruction after it. unsigned *StubCallAddr = StubCallAddrPlus4 - 1; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h index 2f8243a..46d4a08 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h @@ -14,8 +14,8 @@ #ifndef POWERPC_JITINFO_H #define POWERPC_JITINFO_H -#include "llvm/Target/TargetJITInfo.h" #include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/Target/TargetJITInfo.h" namespace llvm { class PPCTargetMachine; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp index 19ec993..9b0df3e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -13,14 +13,15 @@ //===----------------------------------------------------------------------===// #include "PPC.h" +#include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Target/Mangler.h" -#include "llvm/ADT/SmallString.h" using namespace llvm; static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) { @@ -114,6 +115,12 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, break; case PPCII::MO_TPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO; break; + case PPCII::MO_DTPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_DTPREL16_LO; + break; + case PPCII::MO_TLSLD16_LO: RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO; + break; + case PPCII::MO_TOC16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TOC16_LO; + break; } // FIXME: This isn't right, but we don't have a good way to express this in diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 24caffa..ee18ead 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -37,9 +37,19 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// PEI. bool MustSaveLR; + /// Does this function have any stack spills. + bool HasSpills; + + /// Does this function spill using instructions with only r+r (not r+i) + /// forms. + bool HasNonRISpills; + /// SpillsCR - Indicates whether CR is spilled in the current function. bool SpillsCR; + /// Indicates whether VRSAVE is spilled in the current function. + bool SpillsVRSAVE; + /// LRStoreRequired - The bool indicates whether there is some explicit use of /// the LR/LR8 stack slot that is not obvious from scanning the code. This /// requires that the code generator produce a store of LR to the stack on @@ -71,11 +81,17 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// register for parameter passing. unsigned VarArgsNumFPR; + /// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4. + int CRSpillFrameIndex; + public: explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), ReturnAddrSaveIndex(0), + HasSpills(false), + HasNonRISpills(false), SpillsCR(false), + SpillsVRSAVE(false), LRStoreRequired(false), MinReservedArea(0), TailCallSPDelta(0), @@ -83,7 +99,8 @@ public: VarArgsFrameIndex(0), VarArgsStackOffset(0), VarArgsNumGPR(0), - VarArgsNumFPR(0) {} + VarArgsNumFPR(0), + CRSpillFrameIndex(0) {} int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } @@ -105,9 +122,18 @@ public: void setMustSaveLR(bool U) { MustSaveLR = U; } bool mustSaveLR() const { return MustSaveLR; } + void setHasSpills() { HasSpills = true; } + bool hasSpills() const { return HasSpills; } + + void setHasNonRISpills() { HasNonRISpills = true; } + bool hasNonRISpills() const { return HasNonRISpills; } + void setSpillsCR() { SpillsCR = true; } bool isCRSpilled() const { return SpillsCR; } + void setSpillsVRSAVE() { SpillsVRSAVE = true; } + bool isVRSAVESpilled() const { return SpillsVRSAVE; } + void setLRStoreRequired() { LRStoreRequired = true; } bool isLRStoreRequired() const { return LRStoreRequired; } @@ -125,6 +151,9 @@ public: unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; } void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; } + + int getCRSpillFrameIndex() const { return CRSpillFrameIndex; } + void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; } }; } // end of namespace llvm diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 459c358..1d61a3a 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -15,63 +15,45 @@ #define DEBUG_TYPE "reginfo" #include "PPCRegisterInfo.h" #include "PPC.h" +#include "PPCFrameLowering.h" #include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" -#include "PPCFrameLowering.h" #include "PPCSubtarget.h" -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Type.h" -#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include <cstdlib> #define GET_REGINFO_TARGET_DESC #include "PPCGenRegisterInfo.inc" -namespace llvm { -cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger", - cl::init(false), - cl::desc("Disable PPC32 register scavenger"), - cl::Hidden); -cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger", - cl::init(false), - cl::desc("Disable PPC64 register scavenger"), - cl::Hidden); -} - using namespace llvm; -// FIXME (64-bit): Should be inlined. -bool -PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const { - return ((!DisablePPC32RS && !Subtarget.isPPC64()) || - (!DisablePPC64RS && Subtarget.isPPC64())); -} - PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, const TargetInstrInfo &tii) : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR, ST.isPPC64() ? 0 : 1, ST.isPPC64() ? 0 : 1), - Subtarget(ST), TII(tii), CRSpillFrameIdx(0) { + Subtarget(ST), TII(tii) { ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX; ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX; ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX; @@ -86,20 +68,20 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, ImmToIdxMap[PPC::LHZ8] = PPC::LHZX8; ImmToIdxMap[PPC::LWZ8] = PPC::LWZX8; ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8; ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX; - ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32; + ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; } -bool -PPCRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { - return requiresRegisterScavenging(MF); -} - - /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. const TargetRegisterClass * PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const { + if (Kind == 1) { + if (Subtarget.isPPC64()) + return &PPC::G8RC_NOX0RegClass; + return &PPC::GPRC_NOR0RegClass; + } + if (Subtarget.isPPC64()) return &PPC::G8RCRegClass; return &PPC::GPRCRegClass; @@ -111,11 +93,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return Subtarget.isPPC64() ? CSR_Darwin64_SaveList : CSR_Darwin32_SaveList; - // For 32-bit SVR4, also initialize the frame index associated with - // the CR spill slot. - if (!Subtarget.isPPC64()) - CRSpillFrameIdx = 0; - return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList; } @@ -128,12 +105,35 @@ PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask; } +const uint32_t* +PPCRegisterInfo::getNoPreservedMask() const { + // The naming here is inverted: The CSR_NoRegs_Altivec has the + // Altivec registers masked so that they're not saved and restored around + // instructions with this preserved mask. + + if (!Subtarget.hasAltivec()) + return CSR_NoRegs_Altivec_RegMask; + + if (Subtarget.isDarwin()) + return CSR_NoRegs_Darwin_RegMask; + return CSR_NoRegs_RegMask; +} + BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering()); - Reserved.set(PPC::R0); + // The ZERO register is not really a register, but the representation of r0 + // when used in instructions that treat r0 as the constant 0. + Reserved.set(PPC::ZERO); + Reserved.set(PPC::ZERO8); + + // The FP register is also not really a register, but is the representation + // of the frame pointer register used by ISD::FRAMEADDR. + Reserved.set(PPC::FP); + Reserved.set(PPC::FP8); + Reserved.set(PPC::R1); Reserved.set(PPC::LR); Reserved.set(PPC::LR8); @@ -144,35 +144,21 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::R2); // System-reserved register Reserved.set(PPC::R13); // Small Data Area pointer register } - // Reserve R2 on Darwin to hack around the problem of save/restore of CR - // when the stack frame is too big to address directly; we need two regs. - // This is a hack. - if (Subtarget.isDarwinABI()) { - Reserved.set(PPC::R2); - } // On PPC64, r13 is the thread pointer. Never allocate this register. - // Note that this is over conservative, as it also prevents allocation of R31 - // when the FP is not needed. if (Subtarget.isPPC64()) { Reserved.set(PPC::R13); - Reserved.set(PPC::R31); - Reserved.set(PPC::X0); Reserved.set(PPC::X1); Reserved.set(PPC::X13); - Reserved.set(PPC::X31); + + if (PPCFI->needsFP(MF)) + Reserved.set(PPC::X31); // The 64-bit SVR4 ABI reserves r2 for the TOC pointer. if (Subtarget.isSVR4ABI()) { Reserved.set(PPC::X2); } - // Reserve X2 on Darwin to hack around the problem of save/restore of CR - // when the stack frame is too big to address directly; we need two regs. - // This is a hack. - if (Subtarget.isDarwinABI()) { - Reserved.set(PPC::X2); - } } if (PPCFI->needsFP(MF)) @@ -190,6 +176,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, switch (RC->getID()) { default: return 0; + case PPC::G8RC_NOX0RegClassID: + case PPC::GPRC_NOR0RegClassID: case PPC::G8RCRegClassID: case PPC::GPRCRegClassID: { unsigned FP = TFI->hasFP(MF) ? 1 : 0; @@ -204,77 +192,10 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } } -bool -PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { - switch (RC->getID()) { - case PPC::G8RCRegClassID: - case PPC::GPRCRegClassID: - case PPC::F8RCRegClassID: - case PPC::F4RCRegClassID: - case PPC::VRRCRegClassID: - return true; - default: - return false; - } -} - //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// -void PPCRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - if (MF.getTarget().Options.GuaranteedTailCallOpt && - I->getOpcode() == PPC::ADJCALLSTACKUP) { - // Add (actually subtract) back the amount the callee popped on return. - if (int CalleeAmt = I->getOperand(1).getImm()) { - bool is64Bit = Subtarget.isPPC64(); - CalleeAmt *= -1; - unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; - unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; - unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; - unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; - unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; - unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; - MachineInstr *MI = I; - DebugLoc dl = MI->getDebugLoc(); - - if (isInt<16>(CalleeAmt)) { - BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) - .addReg(StackReg, RegState::Kill) - .addImm(CalleeAmt); - } else { - MachineBasicBlock::iterator MBBI = I; - BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) - .addImm(CalleeAmt >> 16); - BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) - .addReg(TmpReg, RegState::Kill) - .addImm(CalleeAmt & 0xFFFF); - BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) - .addReg(StackReg, RegState::Kill) - .addReg(TmpReg); - } - } - } - // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. - MBB.erase(I); -} - -/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered -/// register first and then a spilled callee-saved register if that fails. -static -unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS, - const TargetRegisterClass *RC, int SPAdj) { - assert(RS && "Register scavenging must be on"); - unsigned Reg = RS->FindUnusedReg(RC); - // FIXME: move ARM callee-saved reg scan to target independent code, then - // search for already spilled CS register here. - if (Reg == 0) - Reg = RS->scavengeRegister(RC, II, SPAdj); - return Reg; -} - /// lowerDynamicAlloc - Generate the code for allocating an object in the /// current frame. The sequence of code with be in the general form /// @@ -282,8 +203,7 @@ unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS, /// stwxu R0, SP, Rnegsize ; add and update the SP with the negated size /// addi Rnew, SP, \#maxCalFrameSize ; get the top of the allocation /// -void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { +void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { // Get the instruction. MachineInstr &MI = *II; // Get the instruction's basic block. @@ -315,28 +235,16 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, // Fortunately, a frame greater than 32K is rare. const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *RC = LP64 ? G8RC : GPRC; - - // FIXME (64-bit): Use "findScratchRegister" - unsigned Reg; - if (requiresRegisterScavenging(MF)) - Reg = findScratchRegister(II, RS, RC, SPAdj); - else - Reg = PPC::R0; + unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) { BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg) .addReg(PPC::R31) .addImm(FrameSize); } else if (LP64) { - if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. - BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg) - .addImm(0) - .addReg(PPC::X1); - else - BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0) - .addImm(0) - .addReg(PPC::X1); + BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg) + .addImm(0) + .addReg(PPC::X1); } else { BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg) .addImm(0) @@ -346,17 +254,10 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, // Grow the stack and update the stack pointer link, then determine the // address of new allocated space. if (LP64) { - if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. - BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) - .addReg(Reg, RegState::Kill) - .addReg(PPC::X1) - .addReg(MI.getOperand(1).getReg()); - else - BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) - .addReg(PPC::X0, RegState::Kill) - .addReg(PPC::X1) - .addReg(MI.getOperand(1).getReg()); - + BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) + .addReg(Reg, RegState::Kill) + .addReg(PPC::X1) + .addReg(MI.getOperand(1).getReg()); if (!MI.getOperand(1).isKill()) BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg()) .addReg(PPC::X1) @@ -398,23 +299,19 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, /// stw rA, FI ; Store rA to the frame. /// void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, - unsigned FrameIndex, int SPAdj, - RegScavenger *RS) const { + unsigned FrameIndex) const { // Get the instruction. MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset> // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); DebugLoc dl = MI.getDebugLoc(); - // FIXME: Once LLVM supports creating virtual registers here, or the register - // scavenger can return multiple registers, stop using reserved registers - // here. - (void) SPAdj; - (void) RS; - bool LP64 = Subtarget.isPPC64(); - unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) : - (LP64 ? PPC::X0 : PPC::R0); + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + + unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); unsigned SrcReg = MI.getOperand(0).getReg(); // We need to store the CR in the low 4-bits of the saved value. First, issue @@ -424,16 +321,20 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, // If the saved register wasn't CR0, shift the bits left so that they are in // CR0's slot. - if (SrcReg != PPC::CR0) + if (SrcReg != PPC::CR0) { + unsigned Reg1 = Reg; + Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + // rlwinm rA, rA, ShiftBits, 0, 31. BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg) - .addReg(Reg, RegState::Kill) - .addImm(getPPCRegisterNumbering(SrcReg) * 4) + .addReg(Reg1, RegState::Kill) + .addImm(getEncodingValue(SrcReg) * 4) .addImm(0) .addImm(31); + } addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW)) - .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())), + .addReg(Reg, RegState::Kill), FrameIndex); // Discard the pseudo instruction. @@ -441,23 +342,19 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, } void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, - unsigned FrameIndex, int SPAdj, - RegScavenger *RS) const { + unsigned FrameIndex) const { // Get the instruction. MachineInstr &MI = *II; // ; <DestReg> = RESTORE_CR <offset> // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); DebugLoc dl = MI.getDebugLoc(); - // FIXME: Once LLVM supports creating virtual registers here, or the register - // scavenger can return multiple registers, stop using reserved registers - // here. - (void) SPAdj; - (void) RS; - bool LP64 = Subtarget.isPPC64(); - unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) : - (LP64 ? PPC::X0 : PPC::R0); + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + + unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); unsigned DestReg = MI.getOperand(0).getReg(); assert(MI.definesRegister(DestReg) && "RESTORE_CR does not define its destination"); @@ -468,15 +365,67 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, // If the reloaded register isn't CR0, shift the bits right so that they are // in the right CR's slot. if (DestReg != PPC::CR0) { - unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; + unsigned Reg1 = Reg; + Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + + unsigned ShiftBits = getEncodingValue(DestReg)*4; // rlwinm r11, r11, 32-ShiftBits, 0, 31. BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg) - .addReg(Reg).addImm(32-ShiftBits).addImm(0) + .addReg(Reg1, RegState::Kill).addImm(32-ShiftBits).addImm(0) .addImm(31); } BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTCRF8 : PPC::MTCRF), DestReg) - .addReg(Reg); + .addReg(Reg, RegState::Kill); + + // Discard the pseudo instruction. + MBB.erase(II); +} + +void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex) const { + // Get the instruction. + MachineInstr &MI = *II; // ; SPILL_VRSAVE <SrcReg>, <offset> + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC); + unsigned SrcReg = MI.getOperand(0).getReg(); + + BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg) + .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW)) + .addReg(Reg, RegState::Kill), + FrameIndex); + + // Discard the pseudo instruction. + MBB.erase(II); +} + +void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II, + unsigned FrameIndex) const { + // Get the instruction. + MachineInstr &MI = *II; // ; <DestReg> = RESTORE_VRSAVE <offset> + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC); + unsigned DestReg = MI.getOperand(0).getReg(); + assert(MI.definesRegister(DestReg) && + "RESTORE_VRSAVE does not define its destination"); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::LWZ), + Reg), FrameIndex); + + BuildMI(MBB, II, dl, TII.get(PPC::MTVRSAVEv), DestReg) + .addReg(Reg, RegState::Kill); // Discard the pseudo instruction. MBB.erase(II); @@ -489,18 +438,14 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, // For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4 // ABI, return true to prevent allocating an additional frame slot. // For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0 - // is arbitrary and will be subsequently ignored. For 32-bit, we must - // create exactly one stack slot and return its FrameIdx for all - // nonvolatiles. + // is arbitrary and will be subsequently ignored. For 32-bit, we have + // previously created the stack slot if needed, so return its FrameIdx. if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) { - if (Subtarget.isPPC64()) { + if (Subtarget.isPPC64()) FrameIdx = 0; - } else if (CRSpillFrameIdx) { - FrameIdx = CRSpillFrameIdx; - } else { - MachineFrameInfo *MFI = ((MachineFunction &)MF).getFrameInfo(); - FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); - CRSpillFrameIdx = FrameIdx; + else { + const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + FrameIdx = FI->getCRSpillFrameIndex(); } return true; } @@ -509,7 +454,8 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); // Get the instruction. @@ -523,20 +469,13 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); DebugLoc dl = MI.getDebugLoc(); - // Find out which operand is the frame index. - unsigned FIOperandNo = 0; - while (!MI.getOperand(FIOperandNo).isFI()) { - ++FIOperandNo; - assert(FIOperandNo != MI.getNumOperands() && - "Instr doesn't have FrameIndex operand!"); - } // Take into account whether it's an add or mem instruction - unsigned OffsetOperandNo = (FIOperandNo == 2) ? 1 : 2; + unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2; if (MI.isInlineAsm()) - OffsetOperandNo = FIOperandNo-1; + OffsetOperandNo = FIOperandNum-1; // Get the frame index. - int FrameIndex = MI.getOperand(FIOperandNo).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); // Get the frame pointer save index. Users of this index are primarily // DYNALLOC instructions. @@ -548,25 +487,29 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Special case for dynamic alloca. if (FPSI && FrameIndex == FPSI && (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) { - lowerDynamicAlloc(II, SPAdj, RS); + lowerDynamicAlloc(II); return; } - // Special case for pseudo-ops SPILL_CR and RESTORE_CR. - if (requiresRegisterScavenging(MF)) { - if (OpC == PPC::SPILL_CR) { - lowerCRSpilling(II, FrameIndex, SPAdj, RS); - return; - } else if (OpC == PPC::RESTORE_CR) { - lowerCRRestore(II, FrameIndex, SPAdj, RS); - return; - } + // Special case for pseudo-ops SPILL_CR and RESTORE_CR, etc. + if (OpC == PPC::SPILL_CR) { + lowerCRSpilling(II, FrameIndex); + return; + } else if (OpC == PPC::RESTORE_CR) { + lowerCRRestore(II, FrameIndex); + return; + } else if (OpC == PPC::SPILL_VRSAVE) { + lowerVRSAVESpilling(II, FrameIndex); + return; + } else if (OpC == PPC::RESTORE_VRSAVE) { + lowerVRSAVERestore(II, FrameIndex); + return; } // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). bool is64Bit = Subtarget.isPPC64(); - MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ? + MI.getOperand(FIOperandNum).ChangeToRegister(TFI->hasFP(MF) ? (is64Bit ? PPC::X31 : PPC::R31) : (is64Bit ? PPC::X1 : PPC::R1), false); @@ -579,11 +522,14 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, case PPC::LWA: case PPC::LD: case PPC::STD: - case PPC::STD_32: isIXAddr = true; break; } - + + // If the instruction is not present in ImmToIdxMap, then it has no immediate + // form (and must be r+r). + bool noImmForm = !MI.isInlineAsm() && !ImmToIdxMap.count(OpC); + // Now add the frame object offset to the offset from r1. int Offset = MFI->getObjectOffset(FrameIndex); if (!isIXAddr) @@ -596,7 +542,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // to Offset to get the correct offset. // Naked functions have stack size 0, although getStackSize may not reflect that // because we didn't call all the pieces that compute it for naked functions. - if (!MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked)) + if (!MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked)) Offset += MFI->getStackSize(); // If we can, encode the offset directly into the instruction. If this is a @@ -606,7 +553,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // only "std" to a stack slot that is at least 4-byte aligned, but it can // happen in invalid code. if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm - (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) { + (!noImmForm && + isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) { if (isIXAddr) Offset >>= 2; // The actual encoded value has the low two bits zero. MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); @@ -616,19 +564,17 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // The offset doesn't fit into a single register, scavenge one to build the // offset in. - unsigned SReg; - if (requiresRegisterScavenging(MF)) { - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj); - } else - SReg = is64Bit ? PPC::X0 : PPC::R0; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC; + unsigned SRegHi = MF.getRegInfo().createVirtualRegister(RC), + SReg = MF.getRegInfo().createVirtualRegister(RC); // Insert a set of rA with the full offset value before the ld, st, or add - BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg) + BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SRegHi) .addImm(Offset >> 16); BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg) - .addReg(SReg, RegState::Kill) + .addReg(SRegHi, RegState::Kill) .addImm(Offset); // Convert into indexed form of the instruction: @@ -637,7 +583,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0 unsigned OperandBase; - if (OpC != TargetOpcode::INLINEASM) { + if (noImmForm) + OperandBase = 1; + else if (OpC != TargetOpcode::INLINEASM) { assert(ImmToIdxMap.count(OpC) && "No indexed form of load or store available!"); unsigned NewOpcode = ImmToIdxMap.find(OpC)->second; @@ -647,7 +595,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, OperandBase = OffsetOperandNo; } - unsigned StackReg = MI.getOperand(FIOperandNo).getReg(); + unsigned StackReg = MI.getOperand(FIOperandNum).getReg(); MI.getOperand(OperandBase).ChangeToRegister(StackReg, false); MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h index a8fd796..7e6683e 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -15,8 +15,8 @@ #ifndef POWERPC32_REGISTERINFO_H #define POWERPC32_REGISTERINFO_H +#include "llvm/ADT/DenseMap.h" #include "PPC.h" -#include <map> #define GET_REGINFO_HEADER #include "PPCGenRegisterInfo.inc" @@ -27,10 +27,9 @@ class TargetInstrInfo; class Type; class PPCRegisterInfo : public PPCGenRegisterInfo { - std::map<unsigned, unsigned> ImmToIdxMap; + DenseMap<unsigned, unsigned> ImmToIdxMap; const PPCSubtarget &Subtarget; const TargetInstrInfo &TII; - mutable int CRSpillFrameIdx; public: PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii); @@ -45,31 +44,38 @@ public: /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID CC) const; + const uint32_t *getNoPreservedMask() const; BitVector getReservedRegs(const MachineFunction &MF) const; - virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const; + /// We require the register scavenger. + bool requiresRegisterScavenging(const MachineFunction &MF) const { + return true; + } + + bool requiresFrameIndexScavenging(const MachineFunction &MF) const { + return true; + } + + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const { + return true; + } + + void lowerDynamicAlloc(MachineBasicBlock::iterator II) const; + void lowerCRSpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; + void lowerCRRestore(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; + void lowerVRSAVESpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; + void lowerVRSAVERestore(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; - /// requiresRegisterScavenging - We require a register scavenger. - /// FIXME (64-bit): Should be inlined. - bool requiresRegisterScavenging(const MachineFunction &MF) const; - - bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; - - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - - void lowerDynamicAlloc(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const; - void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex, - int SPAdj, RegScavenger *RS) const; - void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex, - int SPAdj, RegScavenger *RS) const; bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index 5ca3876..57a25f5 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -27,178 +27,72 @@ class PPCReg<string n> : Register<n> { // GPR - One of the 32 32-bit general-purpose registers class GPR<bits<5> num, string n> : PPCReg<n> { - field bits<5> Num = num; + let HWEncoding{4-0} = num; } // GP8 - One of the 32 64-bit general-purpose registers class GP8<GPR SubReg, string n> : PPCReg<n> { - field bits<5> Num = SubReg.Num; + let HWEncoding = SubReg.HWEncoding; let SubRegs = [SubReg]; let SubRegIndices = [sub_32]; } // SPR - One of the 32-bit special-purpose registers class SPR<bits<10> num, string n> : PPCReg<n> { - field bits<10> Num = num; + let HWEncoding{9-0} = num; } // FPR - One of the 32 64-bit floating-point registers class FPR<bits<5> num, string n> : PPCReg<n> { - field bits<5> Num = num; + let HWEncoding{4-0} = num; } // VR - One of the 32 128-bit vector registers class VR<bits<5> num, string n> : PPCReg<n> { - field bits<5> Num = num; + let HWEncoding{4-0} = num; } // CR - One of the 8 4-bit condition registers class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> { - field bits<3> Num = num; + let HWEncoding{2-0} = num; let SubRegs = subregs; } // CRBIT - One of the 32 1-bit condition register fields class CRBIT<bits<5> num, string n> : PPCReg<n> { - field bits<5> Num = num; + let HWEncoding{4-0} = num; } - // General-purpose registers -def R0 : GPR< 0, "r0">, DwarfRegNum<[-2, 0]>; -def R1 : GPR< 1, "r1">, DwarfRegNum<[-2, 1]>; -def R2 : GPR< 2, "r2">, DwarfRegNum<[-2, 2]>; -def R3 : GPR< 3, "r3">, DwarfRegNum<[-2, 3]>; -def R4 : GPR< 4, "r4">, DwarfRegNum<[-2, 4]>; -def R5 : GPR< 5, "r5">, DwarfRegNum<[-2, 5]>; -def R6 : GPR< 6, "r6">, DwarfRegNum<[-2, 6]>; -def R7 : GPR< 7, "r7">, DwarfRegNum<[-2, 7]>; -def R8 : GPR< 8, "r8">, DwarfRegNum<[-2, 8]>; -def R9 : GPR< 9, "r9">, DwarfRegNum<[-2, 9]>; -def R10 : GPR<10, "r10">, DwarfRegNum<[-2, 10]>; -def R11 : GPR<11, "r11">, DwarfRegNum<[-2, 11]>; -def R12 : GPR<12, "r12">, DwarfRegNum<[-2, 12]>; -def R13 : GPR<13, "r13">, DwarfRegNum<[-2, 13]>; -def R14 : GPR<14, "r14">, DwarfRegNum<[-2, 14]>; -def R15 : GPR<15, "r15">, DwarfRegNum<[-2, 15]>; -def R16 : GPR<16, "r16">, DwarfRegNum<[-2, 16]>; -def R17 : GPR<17, "r17">, DwarfRegNum<[-2, 17]>; -def R18 : GPR<18, "r18">, DwarfRegNum<[-2, 18]>; -def R19 : GPR<19, "r19">, DwarfRegNum<[-2, 19]>; -def R20 : GPR<20, "r20">, DwarfRegNum<[-2, 20]>; -def R21 : GPR<21, "r21">, DwarfRegNum<[-2, 21]>; -def R22 : GPR<22, "r22">, DwarfRegNum<[-2, 22]>; -def R23 : GPR<23, "r23">, DwarfRegNum<[-2, 23]>; -def R24 : GPR<24, "r24">, DwarfRegNum<[-2, 24]>; -def R25 : GPR<25, "r25">, DwarfRegNum<[-2, 25]>; -def R26 : GPR<26, "r26">, DwarfRegNum<[-2, 26]>; -def R27 : GPR<27, "r27">, DwarfRegNum<[-2, 27]>; -def R28 : GPR<28, "r28">, DwarfRegNum<[-2, 28]>; -def R29 : GPR<29, "r29">, DwarfRegNum<[-2, 29]>; -def R30 : GPR<30, "r30">, DwarfRegNum<[-2, 30]>; -def R31 : GPR<31, "r31">, DwarfRegNum<[-2, 31]>; +foreach Index = 0-31 in { + def R#Index : GPR<Index, "r"#Index>, DwarfRegNum<[-2, Index]>; +} // 64-bit General-purpose registers -def X0 : GP8< R0, "r0">, DwarfRegNum<[0, -2]>; -def X1 : GP8< R1, "r1">, DwarfRegNum<[1, -2]>; -def X2 : GP8< R2, "r2">, DwarfRegNum<[2, -2]>; -def X3 : GP8< R3, "r3">, DwarfRegNum<[3, -2]>; -def X4 : GP8< R4, "r4">, DwarfRegNum<[4, -2]>; -def X5 : GP8< R5, "r5">, DwarfRegNum<[5, -2]>; -def X6 : GP8< R6, "r6">, DwarfRegNum<[6, -2]>; -def X7 : GP8< R7, "r7">, DwarfRegNum<[7, -2]>; -def X8 : GP8< R8, "r8">, DwarfRegNum<[8, -2]>; -def X9 : GP8< R9, "r9">, DwarfRegNum<[9, -2]>; -def X10 : GP8<R10, "r10">, DwarfRegNum<[10, -2]>; -def X11 : GP8<R11, "r11">, DwarfRegNum<[11, -2]>; -def X12 : GP8<R12, "r12">, DwarfRegNum<[12, -2]>; -def X13 : GP8<R13, "r13">, DwarfRegNum<[13, -2]>; -def X14 : GP8<R14, "r14">, DwarfRegNum<[14, -2]>; -def X15 : GP8<R15, "r15">, DwarfRegNum<[15, -2]>; -def X16 : GP8<R16, "r16">, DwarfRegNum<[16, -2]>; -def X17 : GP8<R17, "r17">, DwarfRegNum<[17, -2]>; -def X18 : GP8<R18, "r18">, DwarfRegNum<[18, -2]>; -def X19 : GP8<R19, "r19">, DwarfRegNum<[19, -2]>; -def X20 : GP8<R20, "r20">, DwarfRegNum<[20, -2]>; -def X21 : GP8<R21, "r21">, DwarfRegNum<[21, -2]>; -def X22 : GP8<R22, "r22">, DwarfRegNum<[22, -2]>; -def X23 : GP8<R23, "r23">, DwarfRegNum<[23, -2]>; -def X24 : GP8<R24, "r24">, DwarfRegNum<[24, -2]>; -def X25 : GP8<R25, "r25">, DwarfRegNum<[25, -2]>; -def X26 : GP8<R26, "r26">, DwarfRegNum<[26, -2]>; -def X27 : GP8<R27, "r27">, DwarfRegNum<[27, -2]>; -def X28 : GP8<R28, "r28">, DwarfRegNum<[28, -2]>; -def X29 : GP8<R29, "r29">, DwarfRegNum<[29, -2]>; -def X30 : GP8<R30, "r30">, DwarfRegNum<[30, -2]>; -def X31 : GP8<R31, "r31">, DwarfRegNum<[31, -2]>; +foreach Index = 0-31 in { + def X#Index : GP8<!cast<GPR>("R"#Index), "r"#Index>, + DwarfRegNum<[Index, -2]>; +} // Floating-point registers -def F0 : FPR< 0, "f0">, DwarfRegNum<[32, 32]>; -def F1 : FPR< 1, "f1">, DwarfRegNum<[33, 33]>; -def F2 : FPR< 2, "f2">, DwarfRegNum<[34, 34]>; -def F3 : FPR< 3, "f3">, DwarfRegNum<[35, 35]>; -def F4 : FPR< 4, "f4">, DwarfRegNum<[36, 36]>; -def F5 : FPR< 5, "f5">, DwarfRegNum<[37, 37]>; -def F6 : FPR< 6, "f6">, DwarfRegNum<[38, 38]>; -def F7 : FPR< 7, "f7">, DwarfRegNum<[39, 39]>; -def F8 : FPR< 8, "f8">, DwarfRegNum<[40, 40]>; -def F9 : FPR< 9, "f9">, DwarfRegNum<[41, 41]>; -def F10 : FPR<10, "f10">, DwarfRegNum<[42, 42]>; -def F11 : FPR<11, "f11">, DwarfRegNum<[43, 43]>; -def F12 : FPR<12, "f12">, DwarfRegNum<[44, 44]>; -def F13 : FPR<13, "f13">, DwarfRegNum<[45, 45]>; -def F14 : FPR<14, "f14">, DwarfRegNum<[46, 46]>; -def F15 : FPR<15, "f15">, DwarfRegNum<[47, 47]>; -def F16 : FPR<16, "f16">, DwarfRegNum<[48, 48]>; -def F17 : FPR<17, "f17">, DwarfRegNum<[49, 49]>; -def F18 : FPR<18, "f18">, DwarfRegNum<[50, 50]>; -def F19 : FPR<19, "f19">, DwarfRegNum<[51, 51]>; -def F20 : FPR<20, "f20">, DwarfRegNum<[52, 52]>; -def F21 : FPR<21, "f21">, DwarfRegNum<[53, 53]>; -def F22 : FPR<22, "f22">, DwarfRegNum<[54, 54]>; -def F23 : FPR<23, "f23">, DwarfRegNum<[55, 55]>; -def F24 : FPR<24, "f24">, DwarfRegNum<[56, 56]>; -def F25 : FPR<25, "f25">, DwarfRegNum<[57, 57]>; -def F26 : FPR<26, "f26">, DwarfRegNum<[58, 58]>; -def F27 : FPR<27, "f27">, DwarfRegNum<[59, 59]>; -def F28 : FPR<28, "f28">, DwarfRegNum<[60, 60]>; -def F29 : FPR<29, "f29">, DwarfRegNum<[61, 61]>; -def F30 : FPR<30, "f30">, DwarfRegNum<[62, 62]>; -def F31 : FPR<31, "f31">, DwarfRegNum<[63, 63]>; +foreach Index = 0-31 in { + def F#Index : FPR<Index, "f"#Index>, + DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>; +} // Vector registers -def V0 : VR< 0, "v0">, DwarfRegNum<[77, 77]>; -def V1 : VR< 1, "v1">, DwarfRegNum<[78, 78]>; -def V2 : VR< 2, "v2">, DwarfRegNum<[79, 79]>; -def V3 : VR< 3, "v3">, DwarfRegNum<[80, 80]>; -def V4 : VR< 4, "v4">, DwarfRegNum<[81, 81]>; -def V5 : VR< 5, "v5">, DwarfRegNum<[82, 82]>; -def V6 : VR< 6, "v6">, DwarfRegNum<[83, 83]>; -def V7 : VR< 7, "v7">, DwarfRegNum<[84, 84]>; -def V8 : VR< 8, "v8">, DwarfRegNum<[85, 85]>; -def V9 : VR< 9, "v9">, DwarfRegNum<[86, 86]>; -def V10 : VR<10, "v10">, DwarfRegNum<[87, 87]>; -def V11 : VR<11, "v11">, DwarfRegNum<[88, 88]>; -def V12 : VR<12, "v12">, DwarfRegNum<[89, 89]>; -def V13 : VR<13, "v13">, DwarfRegNum<[90, 90]>; -def V14 : VR<14, "v14">, DwarfRegNum<[91, 91]>; -def V15 : VR<15, "v15">, DwarfRegNum<[92, 92]>; -def V16 : VR<16, "v16">, DwarfRegNum<[93, 93]>; -def V17 : VR<17, "v17">, DwarfRegNum<[94, 94]>; -def V18 : VR<18, "v18">, DwarfRegNum<[95, 95]>; -def V19 : VR<19, "v19">, DwarfRegNum<[96, 96]>; -def V20 : VR<20, "v20">, DwarfRegNum<[97, 97]>; -def V21 : VR<21, "v21">, DwarfRegNum<[98, 98]>; -def V22 : VR<22, "v22">, DwarfRegNum<[99, 99]>; -def V23 : VR<23, "v23">, DwarfRegNum<[100, 100]>; -def V24 : VR<24, "v24">, DwarfRegNum<[101, 101]>; -def V25 : VR<25, "v25">, DwarfRegNum<[102, 102]>; -def V26 : VR<26, "v26">, DwarfRegNum<[103, 103]>; -def V27 : VR<27, "v27">, DwarfRegNum<[104, 104]>; -def V28 : VR<28, "v28">, DwarfRegNum<[105, 105]>; -def V29 : VR<29, "v29">, DwarfRegNum<[106, 106]>; -def V30 : VR<30, "v30">, DwarfRegNum<[107, 107]>; -def V31 : VR<31, "v31">, DwarfRegNum<[108, 108]>; +foreach Index = 0-31 in { + def V#Index : VR<Index, "v"#Index>, + DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>; +} + +// The reprsentation of r0 when treated as the constant 0. +def ZERO : GPR<0, "0">; +def ZERO8 : GP8<ZERO, "0">; + +// Representations of the frame pointer used by ISD::FRAMEADDR. +def FP : GPR<0 /* arbitrary */, "**FRAME POINTER**">; +def FP8 : GP8<FP, "**FRAME POINTER**">; // Condition register bits def CR0LT : CRBIT< 0, "0">; @@ -278,11 +172,17 @@ def RM: SPR<512, "**ROUNDING MODE**">; // then nonvolatiles in reverse order since stmw/lmw save from rN to r31 def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12), (sequence "R%u", 30, 13), - R31, R0, R1, LR)>; + R31, R0, R1, FP)>; def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12), (sequence "X%u", 30, 14), - X31, X13, X0, X1, LR8)>; + X31, X13, X0, X1, FP8)>; + +// For some instructions r0 is special (representing the value 0 instead of +// the value in the r0 register), and we use these register subclasses to +// prevent r0 from being allocated for use by those instructions. +def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)>; +def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)>; // Allocate volatiles first, then non-volatiles in reverse order. With the SVR4 // ABI the size of the Floating-point register save area is determined by the diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td index ba63b5c..ae084aa 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td @@ -749,3 +749,18 @@ def PPCA2Itineraries : ProcessorItineraries< [15, 7], [FPR_Bypass, FPR_Bypass]> ]>; + +// ===---------------------------------------------------------------------===// +// A2 machine model for scheduling and other instruction cost heuristics. + +def PPCA2Model : SchedMachineModel { + let IssueWidth = 1; // 2 micro-ops are dispatched per cycle. + let MinLatency = -1; // OperandCycles are interpreted as MinLatency. + let LoadLatency = 6; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + let MispredictPenalty = 6; + + let Itineraries = PPCA2Itineraries; +} + diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td index 7c02ea0..c64998d 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td @@ -92,3 +92,18 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<VecVSL , [InstrStage<2, [VIU1]>]>, InstrItinData<VecVSR , [InstrStage<3, [VPU]>]> ]>; + +// ===---------------------------------------------------------------------===// +// e5500 machine model for scheduling and other instruction cost heuristics. + +def G5Model : SchedMachineModel { + let IssueWidth = 4; // 4 (non-branch) instructions are dispatched per cycle. + let MinLatency = 0; // Out-of-order dispatch. + let LoadLatency = 3; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + let MispredictPenalty = 16; + + let Itineraries = G5Itineraries; +} + diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 9c8cb92..a8f2b3f 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -12,12 +12,12 @@ //===----------------------------------------------------------------------===// #include "PPCSubtarget.h" -#include "PPCRegisterInfo.h" #include "PPC.h" -#include "llvm/GlobalValue.h" -#include "llvm/Target/TargetMachine.h" +#include "PPCRegisterInfo.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetMachine.h" #include <cstdlib> #define GET_SUBTARGETINFO_TARGET_DESC @@ -36,9 +36,20 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, , Use64BitRegs(false) , IsPPC64(is64Bit) , HasAltivec(false) + , HasQPX(false) , HasFSQRT(false) + , HasFRE(false) + , HasFRES(false) + , HasFRSQRTE(false) + , HasFRSQRTES(false) + , HasRecipPrec(false) , HasSTFIWX(false) + , HasLFIWAX(false) + , HasFPRND(false) + , HasFPCVT(false) , HasISEL(false) + , HasPOPCNTD(false) + , HasLDBRX(false) , IsBookE(false) , HasLazyResolverStubs(false) , IsJITCodeModel(false) @@ -82,6 +93,12 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, // Set up darwin-specific properties. if (isDarwin()) HasLazyResolverStubs = true; + + // QPX requires a 32-byte aligned stack. Note that we need to do this if + // we're compiling for a BG/Q system regardless of whether or not QPX + // is enabled because external functions will assume this alignment. + if (hasQPX() || isBGQ()) + StackAlignment = 32; } /// SetJITMode - This is called to inform the subtarget info that we are diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h index b9e22f4..65b4d21 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -14,9 +14,9 @@ #ifndef POWERPCSUBTARGET_H #define POWERPCSUBTARGET_H -#include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/MC/MCInstrItineraries.h" #include "llvm/ADT/Triple.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <string> #define GET_SUBTARGETINFO_HEADER @@ -43,7 +43,12 @@ namespace PPC { DIR_A2, DIR_E500mc, DIR_E5500, + DIR_PWR3, + DIR_PWR4, + DIR_PWR5, + DIR_PWR5X, DIR_PWR6, + DIR_PWR6X, DIR_PWR7, DIR_64 }; @@ -70,9 +75,17 @@ protected: bool Use64BitRegs; bool IsPPC64; bool HasAltivec; + bool HasQPX; bool HasFSQRT; + bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES; + bool HasRecipPrec; bool HasSTFIWX; + bool HasLFIWAX; + bool HasFPRND; + bool HasFPCVT; bool HasISEL; + bool HasPOPCNTD; + bool HasLDBRX; bool IsBookE; bool HasLazyResolverStubs; bool IsJITCodeModel; @@ -148,10 +161,21 @@ public: // Specific obvious features. bool hasFSQRT() const { return HasFSQRT; } + bool hasFRE() const { return HasFRE; } + bool hasFRES() const { return HasFRES; } + bool hasFRSQRTE() const { return HasFRSQRTE; } + bool hasFRSQRTES() const { return HasFRSQRTES; } + bool hasRecipPrec() const { return HasRecipPrec; } bool hasSTFIWX() const { return HasSTFIWX; } + bool hasLFIWAX() const { return HasLFIWAX; } + bool hasFPRND() const { return HasFPRND; } + bool hasFPCVT() const { return HasFPCVT; } bool hasAltivec() const { return HasAltivec; } + bool hasQPX() const { return HasQPX; } bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } + bool hasPOPCNTD() const { return HasPOPCNTD; } + bool hasLDBRX() const { return HasLDBRX; } bool isBookE() const { return IsBookE; } const Triple &getTargetTriple() const { return TargetTriple; } @@ -160,6 +184,8 @@ public: bool isDarwin() const { return TargetTriple.isMacOSX(); } /// isBGP - True if this is a BG/P platform. bool isBGP() const { return TargetTriple.getVendor() == Triple::BGP; } + /// isBGQ - True if this is a BG/Q platform. + bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; } bool isDarwinABI() const { return isDarwin(); } bool isSVR4ABI() const { return !isDarwin(); } diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index 3fc977e..fe851c1 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -13,13 +13,13 @@ #include "PPCTargetMachine.h" #include "PPC.h" -#include "llvm/PassManager.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; static cl:: @@ -43,8 +43,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, DL(Subtarget.getDataLayoutString()), InstrInfo(*this), FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()), - STTI(&TLInfo), VTTI(&TLInfo) { + InstrItins(Subtarget.getInstrItineraryData()) { // The binutils for the BG/P are too old for CFI. if (Subtarget.isBGP()) @@ -127,3 +126,12 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, return false; } + +void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) { + // Add first the target-independent BasicTTI pass, then our PPC pass. This + // allows the PPC pass to delegate to the target independent layer when + // appropriate. + PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createPPCTargetTransformInfoPass(this)); +} + diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h index c168433..606ccb3 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h @@ -15,14 +15,13 @@ #define PPC_TARGETMACHINE_H #include "PPCFrameLowering.h" -#include "PPCSubtarget.h" -#include "PPCJITInfo.h" -#include "PPCInstrInfo.h" #include "PPCISelLowering.h" +#include "PPCInstrInfo.h" +#include "PPCJITInfo.h" #include "PPCSelectionDAGInfo.h" +#include "PPCSubtarget.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" -#include "llvm/DataLayout.h" namespace llvm { @@ -37,8 +36,6 @@ class PPCTargetMachine : public LLVMTargetMachine { PPCTargetLowering TLInfo; PPCSelectionDAGInfo TSInfo; InstrItineraryData InstrItins; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: PPCTargetMachine(const Target &T, StringRef TT, @@ -66,17 +63,14 @@ public: virtual const InstrItineraryData *getInstrItineraryData() const { return &InstrItins; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); + + /// \brief Register PPC analysis passes with a pass manager. + virtual void addAnalysisPasses(PassManagerBase &PM); }; /// PPC32TargetMachine - PowerPC 32-bit target machine. diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp new file mode 100644 index 0000000..2504ba7 --- /dev/null +++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -0,0 +1,240 @@ +//===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// PPC target machine. It uses the target's detailed information to provide +/// more precise answers to certain TTI queries, while letting the target +/// independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ppctti" +#include "PPC.h" +#include "PPCTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/CostTable.h" +using namespace llvm; + +// Declare the pass initialization routine locally as target-specific passes +// don't havve a target-wide initialization entry point, and so we rely on the +// pass constructor initialization. +namespace llvm { +void initializePPCTTIPass(PassRegistry &); +} + +namespace { + +class PPCTTI : public ImmutablePass, public TargetTransformInfo { + const PPCTargetMachine *TM; + const PPCSubtarget *ST; + const PPCTargetLowering *TLI; + + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + +public: + PPCTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { + llvm_unreachable("This pass cannot be directly constructed"); + } + + PPCTTI(const PPCTargetMachine *TM) + : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), + TLI(TM->getTargetLowering()) { + initializePPCTTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + pushTTIStack(this); + } + + virtual void finalizePass() { + popTTIStack(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + TargetTransformInfo::getAnalysisUsage(AU); + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + /// \name Scalar TTI Implementations + /// @{ + virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + virtual unsigned getNumberOfRegisters(bool Vector) const; + virtual unsigned getRegisterBitWidth(bool Vector) const; + virtual unsigned getMaximumUnrollFactor() const; + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind, + OperandValueKind) const; + virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const; + virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const; + virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const; + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const; + virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const; + + /// @} +}; + +} // end anonymous namespace + +INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti", + "PPC Target Transform Info", true, true, false) +char PPCTTI::ID = 0; + +ImmutablePass * +llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) { + return new PPCTTI(TM); +} + + +//===----------------------------------------------------------------------===// +// +// PPC cost model. +// +//===----------------------------------------------------------------------===// + +PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const { + assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); + if (ST->hasPOPCNTD() && TyWidth <= 64) + return PSK_FastHardware; + return PSK_Software; +} + +unsigned PPCTTI::getNumberOfRegisters(bool Vector) const { + if (Vector && !ST->hasAltivec()) + return 0; + return 32; +} + +unsigned PPCTTI::getRegisterBitWidth(bool Vector) const { + if (Vector) { + if (ST->hasAltivec()) return 128; + return 0; + } + + if (ST->isPPC64()) + return 64; + return 32; + +} + +unsigned PPCTTI::getMaximumUnrollFactor() const { + unsigned Directive = ST->getDarwinDirective(); + // The 440 has no SIMD support, but floating-point instructions + // have a 5-cycle latency, so unroll by 5x for latency hiding. + if (Directive == PPC::DIR_440) + return 5; + + // The A2 has no SIMD support, but floating-point instructions + // have a 6-cycle latency, so unroll by 6x for latency hiding. + if (Directive == PPC::DIR_A2) + return 6; + + // FIXME: For lack of any better information, do no harm... + if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) + return 1; + + // For most things, modern systems have two execution units (and + // out-of-order execution). + return 2; +} + +unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind Op1Info, + OperandValueKind Op2Info) const { + assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); + + // Fallback to the default implementation. + return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info, + Op2Info); +} + +unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) const { + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); +} + +unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { + assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); + + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); +} + +unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); +} + +unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + assert(Val->isVectorTy() && "This must be a vector type"); + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + // Estimated cost of a load-hit-store delay. This was obtained + // experimentally as a minimum needed to prevent unprofitable + // vectorization for the paq8p benchmark. It may need to be + // raised further if other unprofitable cases remain. + unsigned LHSPenalty = 12; + + // Vector element insert/extract with Altivec is very expensive, + // because they require store and reload with the attendant + // processor stall for load-hit-store. Until VSX is available, + // these need to be estimated as very costly. + if (ISD == ISD::EXTRACT_VECTOR_ELT || + ISD == ISD::INSERT_VECTOR_ELT) + return LHSPenalty + + TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); + + return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); +} + +unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) const { + // Legalize the type. + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && + "Invalid Opcode"); + + // Each load/store unit costs 1. + unsigned Cost = LT.first * 1; + + // PPC in general does not support unaligned loads and stores. They'll need + // to be decomposed based on the alignment factor. + unsigned SrcBytes = LT.second.getStoreSize(); + if (SrcBytes && Alignment && Alignment < SrcBytes) + Cost *= (SrcBytes/Alignment); + + return Cost; +} + diff --git a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp index 5dc8568..fa44331 100644 --- a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "PPC.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; |