diff options
author | ed <ed@FreeBSD.org> | 2009-07-04 13:58:26 +0000 |
---|---|---|
committer | ed <ed@FreeBSD.org> | 2009-07-04 13:58:26 +0000 |
commit | 72621d11de5b873f1695f391eb95f0b336c3d2d4 (patch) | |
tree | 84360c8989c912127a383af37c4b1aa5767bd16e /lib/Target/PowerPC | |
parent | cf5cd875b51255602afaed29deb636b66b295671 (diff) | |
download | FreeBSD-src-72621d11de5b873f1695f391eb95f0b336c3d2d4.zip FreeBSD-src-72621d11de5b873f1695f391eb95f0b336c3d2d4.tar.gz |
Import LLVM 74788.
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r-- | lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp | 36 | ||||
-rw-r--r-- | lib/Target/PowerPC/CMakeLists.txt | 2 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPC.h | 5 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCCallingConv.td | 89 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCCodeEmitter.cpp | 10 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCFrameInfo.h | 128 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCHazardRecognizers.cpp | 2 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.cpp | 1199 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCISelLowering.h | 34 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstr64Bit.td | 40 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstrInfo.cpp | 84 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCInstrInfo.td | 70 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCJITInfo.cpp | 2 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCRegisterInfo.cpp | 274 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCRegisterInfo.h | 2 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCRegisterInfo.td | 61 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCSubtarget.h | 4 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCTargetMachine.cpp | 10 | ||||
-rw-r--r-- | lib/Target/PowerPC/PPCTargetMachine.h | 1 | ||||
-rw-r--r-- | lib/Target/PowerPC/README.txt | 10 |
20 files changed, 1474 insertions, 589 deletions
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp index c5aa6ae..7f1673c 100644 --- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp @@ -56,9 +56,8 @@ namespace { const PPCSubtarget &Subtarget; public: explicit PPCAsmPrinter(raw_ostream &O, TargetMachine &TM, - const TargetAsmInfo *T, CodeGenOpt::Level OL, - bool V) - : AsmPrinter(O, TM, T, OL, V), + const TargetAsmInfo *T, bool V) + : AsmPrinter(O, TM, T, V), Subtarget(TM.getSubtarget<PPCSubtarget>()) {} virtual const char *getPassName() const { @@ -189,8 +188,7 @@ namespace { if (TM.getRelocationModel() != Reloc::Static) { if (MO.getType() == MachineOperand::MO_GlobalAddress) { GlobalValue *GV = MO.getGlobal(); - if (((GV->isDeclaration() || GV->hasWeakLinkage() || - GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) { + if (GV->isDeclaration() || GV->isWeakForLinker()) { // Dynamically-resolved functions need a stub for the function. std::string Name = Mang->getValueName(GV); FnStubs.insert(Name); @@ -296,9 +294,8 @@ namespace { class VISIBILITY_HIDDEN PPCLinuxAsmPrinter : public PPCAsmPrinter { public: explicit PPCLinuxAsmPrinter(raw_ostream &O, PPCTargetMachine &TM, - const TargetAsmInfo *T, CodeGenOpt::Level OL, - bool V) - : PPCAsmPrinter(O, TM, T, OL, V){} + const TargetAsmInfo *T, bool V) + : PPCAsmPrinter(O, TM, T, V){} virtual const char *getPassName() const { return "Linux PPC Assembly Printer"; @@ -323,9 +320,8 @@ namespace { raw_ostream &OS; public: explicit PPCDarwinAsmPrinter(raw_ostream &O, PPCTargetMachine &TM, - const TargetAsmInfo *T, CodeGenOpt::Level OL, - bool V) - : PPCAsmPrinter(O, TM, T, OL, V), OS(O) {} + const TargetAsmInfo *T, bool V) + : PPCAsmPrinter(O, TM, T, V), OS(O) {} virtual const char *getPassName() const { return "Darwin PPC Assembly Printer"; @@ -387,11 +383,12 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO) { if (TM.getRelocationModel() != Reloc::Static) { if (GV->isDeclaration() || GV->isWeakForLinker()) { if (GV->hasHiddenVisibility()) { - if (!GV->isDeclaration() && !GV->hasCommonLinkage()) - O << Name; - else { + if (GV->isDeclaration() || GV->hasCommonLinkage() || + GV->hasAvailableExternallyLinkage()) { HiddenGVStubs.insert(Name); printSuffixedName(Name, "$non_lazy_ptr"); + } else { + O << Name; } } else { GVStubs.insert(Name); @@ -596,7 +593,7 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) { printVisibility(CurrentFnName, F->getVisibility()); - EmitAlignment(2, F); + EmitAlignment(MF.getAlignment(), F); O << CurrentFnName << ":\n"; // Emit pre-function debug information. @@ -773,7 +770,7 @@ bool PPCDarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) { printVisibility(CurrentFnName, F->getVisibility()); - EmitAlignment(F->hasFnAttr(Attribute::OptimizeForSize) ? 2 : 4, F); + EmitAlignment(MF.getAlignment(), F); O << CurrentFnName << ":\n"; // Emit pre-function debug information. @@ -1119,16 +1116,13 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { /// FunctionPass *llvm::createPPCAsmPrinterPass(raw_ostream &o, PPCTargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose) { const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>(); if (Subtarget->isDarwin()) { - return new PPCDarwinAsmPrinter(o, tm, tm.getTargetAsmInfo(), - OptLevel, verbose); + return new PPCDarwinAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); } else { - return new PPCLinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), - OptLevel, verbose); + return new PPCLinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); } } diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 0b67aff..a6479d8 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -26,3 +26,5 @@ add_llvm_target(PowerPCCodeGen PPCTargetAsmInfo.cpp PPCTargetMachine.cpp ) + +target_link_libraries (LLVMPowerPCCodeGen LLVMSelectionDAG) diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index c844e21..f6c3469 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -28,9 +28,8 @@ namespace llvm { FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); -FunctionPass *createPPCAsmPrinterPass(raw_ostream &OS, - PPCTargetMachine &TM, - CodeGenOpt::Level OptLevel, bool Verbose); +FunctionPass *createPPCAsmPrinterPass(raw_ostream &OS, PPCTargetMachine &TM, + bool Verbose); FunctionPass *createPPCCodeEmitterPass(PPCTargetMachine &TM, MachineCodeEmitter &MCE); FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index 9f916f3..c7ce171 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -43,10 +43,8 @@ def CC_PPC : CallingConv<[ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>, // Common sub-targets passes FP values in F1 - F13 - CCIfType<[f32, f64], CCIfSubtarget<"isMachoABI()", - CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>>, - // ELF32 sub-target pass FP values in F1 - F8. - CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, + CCIfType<[f32, f64], + CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>, // The first 12 Vector arguments are passed in altivec registers. CCIfType<[v16i8, v8i16, v4i32, v4f32], @@ -64,3 +62,86 @@ def CC_PPC : CallingConv<[ */ +//===----------------------------------------------------------------------===// +// PowerPC System V Release 4 ABI +//===----------------------------------------------------------------------===// + +// _Complex arguments are never split, thus their two scalars are either +// passed both in argument registers or both on the stack. Also _Complex +// arguments are always passed in general purpose registers, never in +// Floating-point registers or vector registers. Arguments which should go +// on the stack are marked with the inreg parameter attribute. +// Giving inreg this target-dependent (and counter-intuitive) meaning +// simplifies things, because functions calls are not always coming from the +// frontend but are also created implicitly e.g. for libcalls. If inreg would +// actually mean that the argument is passed in a register, then all places +// which create function calls/function definitions implicitly would need to +// be aware of this fact and would need to mark arguments accordingly. With +// inreg meaning that the argument is passed on the stack, this is not an +// issue, except for calls which involve _Complex types. + +def CC_PPC_SVR4_Common : CallingConv<[ + // The ABI requires i64 to be passed in two adjacent registers with the first + // register having an odd register number. + CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>, + + // The first 8 integer arguments are passed in integer registers. + CCIfType<[i32], CCIf<"!ArgFlags.isInReg()", + CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>, + + // Make sure the i64 words from a long double are either both passed in + // registers or both passed on the stack. + CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignFPArgRegs">>>, + + // FP values are passed in F1 - F8. + CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, + + // Split arguments have an alignment of 8 bytes on the stack. + CCIfType<[i32], CCIfSplit<CCAssignToStack<4, 8>>>, + + CCIfType<[i32], CCAssignToStack<4, 4>>, + + // Floats are stored in double precision format, thus they have the same + // alignment and size as doubles. + CCIfType<[f32,f64], CCAssignToStack<8, 8>>, + + // Vectors get 16-byte stack slots that are 16-byte aligned. + CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToStack<16, 16>> +]>; + +// This calling convention puts vector arguments always on the stack. It is used +// to assign vector arguments which belong to the variable portion of the +// parameter list of a variable argument function. +def CC_PPC_SVR4_VarArg : CallingConv<[ + CCDelegateTo<CC_PPC_SVR4_Common> +]>; + +// In contrast to CC_PPC_SVR4_VarArg, this calling convention first tries to put +// vector arguments in vector registers before putting them on the stack. +def CC_PPC_SVR4 : CallingConv<[ + // The first 12 Vector arguments are passed in AltiVec registers. + CCIfType<[v16i8, v8i16, v4i32, v4f32], + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>, + + CCDelegateTo<CC_PPC_SVR4_Common> +]>; + +// Helper "calling convention" to handle aggregate by value arguments. +// Aggregate by value arguments are always placed in the local variable space +// of the caller. This calling convention is only used to assign those stack +// offsets in the callers stack frame. +// +// Still, the address of the aggregate copy in the callers stack frame is passed +// in a GPR (or in the parameter list area if all GPRs are allocated) from the +// caller to the callee. The location for the address argument is assigned by +// the CC_PPC_SVR4 calling convention. +// +// The only purpose of CC_PPC_SVR4_Custom_Dummy is to skip arguments which are +// not passed by value. + +def CC_PPC_SVR4_ByVal : CallingConv<[ + CCIfByVal<CCPassByVal<4, 4>>, + + CCCustom<"CC_PPC_SVR4_Custom_Dummy"> +]>; + diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index aa3dce1..cd6018d 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -163,8 +163,8 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, } else if (MO.isGlobal() || MO.isSymbol() || MO.isCPI() || MO.isJTI()) { unsigned Reloc = 0; - if (MI.getOpcode() == PPC::BL_Macho || MI.getOpcode() == PPC::BL8_Macho || - MI.getOpcode() == PPC::BL_ELF || MI.getOpcode() == PPC::BL8_ELF || + if (MI.getOpcode() == PPC::BL_Darwin || MI.getOpcode() == PPC::BL8_Darwin || + MI.getOpcode() == PPC::BL_SVR4 || MI.getOpcode() == PPC::BL8_ELF || MI.getOpcode() == PPC::TAILB || MI.getOpcode() == PPC::TAILB8) Reloc = PPC::reloc_pcrel_bx; else { @@ -246,9 +246,9 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, } else if (MO.isMBB()) { unsigned Reloc = 0; unsigned Opcode = MI.getOpcode(); - if (Opcode == PPC::B || Opcode == PPC::BL_Macho || - Opcode == PPC::BLA_Macho || Opcode == PPC::BL_ELF || - Opcode == PPC::BLA_ELF) + if (Opcode == PPC::B || Opcode == PPC::BL_Darwin || + Opcode == PPC::BLA_Darwin|| Opcode == PPC::BL_SVR4 || + Opcode == PPC::BLA_SVR4) Reloc = PPC::reloc_pcrel_bx; else // BCC instruction Reloc = PPC::reloc_pcrel_bcx; diff --git a/lib/Target/PowerPC/PPCFrameInfo.h b/lib/Target/PowerPC/PPCFrameInfo.h index 1b5893d..770a560 100644 --- a/lib/Target/PowerPC/PPCFrameInfo.h +++ b/lib/Target/PowerPC/PPCFrameInfo.h @@ -14,8 +14,10 @@ #define POWERPC_FRAMEINFO_H #include "PPC.h" +#include "PPCSubtarget.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/STLExtras.h" namespace llvm { @@ -29,63 +31,153 @@ public: /// getReturnSaveOffset - Return the previous frame offset to save the /// return address. - static unsigned getReturnSaveOffset(bool LP64, bool isMacho) { - if (isMacho) + static unsigned getReturnSaveOffset(bool LP64, bool isDarwinABI) { + if (isDarwinABI) return LP64 ? 16 : 8; - // For ELF 32 ABI: + // SVR4 ABI: return 4; } /// getFramePointerSaveOffset - Return the previous frame offset to save the /// frame pointer. - static unsigned getFramePointerSaveOffset(bool LP64, bool isMacho) { - // For MachO ABI: + static unsigned getFramePointerSaveOffset(bool LP64, bool isDarwinABI) { + // For the Darwin ABI: // Use the TOC save slot in the PowerPC linkage area for saving the frame // pointer (if needed.) LLVM does not generate code that uses the TOC (R2 // is treated as a caller saved register.) - if (isMacho) + if (isDarwinABI) return LP64 ? 40 : 20; - // For ELF 32 ABI: + // SVR4 ABI: // Save it right before the link register return -4U; } /// getLinkageSize - Return the size of the PowerPC ABI linkage area. /// - static unsigned getLinkageSize(bool LP64, bool isMacho) { - if (isMacho) + static unsigned getLinkageSize(bool LP64, bool isDarwinABI) { + if (isDarwinABI) return 6 * (LP64 ? 8 : 4); - // For ELF 32 ABI: + // SVR4 ABI: return 8; } /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI /// argument area. - static unsigned getMinCallArgumentsSize(bool LP64, bool isMacho) { - // For Macho ABI: + static unsigned getMinCallArgumentsSize(bool LP64, bool isDarwinABI) { + // For the Darwin ABI: // The prolog code of the callee may store up to 8 GPR argument registers to // the stack, allowing va_start to index over them in memory if its varargs. // Because we cannot tell if this is needed on the caller side, we have to // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. - if (isMacho) + if (isDarwinABI) return 8 * (LP64 ? 8 : 4); - // For ELF 32 ABI: + // SVR4 ABI: // There is no default stack allocated for the 8 first GPR arguments. return 0; } /// getMinCallFrameSize - Return the minimum size a call frame can be using /// the PowerPC ABI. - static unsigned getMinCallFrameSize(bool LP64, bool isMacho) { + static unsigned getMinCallFrameSize(bool LP64, bool isDarwinABI) { // The call frame needs to be at least big enough for linkage and 8 args. - return getLinkageSize(LP64, isMacho) + - getMinCallArgumentsSize(LP64, isMacho); + return getLinkageSize(LP64, isDarwinABI) + + getMinCallArgumentsSize(LP64, isDarwinABI); + } + + // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. + const std::pair<unsigned, int> * + getCalleeSavedSpillSlots(unsigned &NumEntries) const { + // Early exit if not using the SVR4 ABI. + if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) { + NumEntries = 0; + return 0; + } + + static const std::pair<unsigned, int> Offsets[] = { + // Floating-point register save area offsets. + std::pair<unsigned, int>(PPC::F31, -8), + std::pair<unsigned, int>(PPC::F30, -16), + std::pair<unsigned, int>(PPC::F29, -24), + std::pair<unsigned, int>(PPC::F28, -32), + std::pair<unsigned, int>(PPC::F27, -40), + std::pair<unsigned, int>(PPC::F26, -48), + std::pair<unsigned, int>(PPC::F25, -56), + std::pair<unsigned, int>(PPC::F24, -64), + std::pair<unsigned, int>(PPC::F23, -72), + std::pair<unsigned, int>(PPC::F22, -80), + std::pair<unsigned, int>(PPC::F21, -88), + std::pair<unsigned, int>(PPC::F20, -96), + std::pair<unsigned, int>(PPC::F19, -104), + std::pair<unsigned, int>(PPC::F18, -112), + std::pair<unsigned, int>(PPC::F17, -120), + std::pair<unsigned, int>(PPC::F16, -128), + std::pair<unsigned, int>(PPC::F15, -136), + std::pair<unsigned, int>(PPC::F14, -144), + + // General register save area offsets. + std::pair<unsigned, int>(PPC::R31, -4), + std::pair<unsigned, int>(PPC::R30, -8), + std::pair<unsigned, int>(PPC::R29, -12), + std::pair<unsigned, int>(PPC::R28, -16), + std::pair<unsigned, int>(PPC::R27, -20), + std::pair<unsigned, int>(PPC::R26, -24), + std::pair<unsigned, int>(PPC::R25, -28), + std::pair<unsigned, int>(PPC::R24, -32), + std::pair<unsigned, int>(PPC::R23, -36), + std::pair<unsigned, int>(PPC::R22, -40), + std::pair<unsigned, int>(PPC::R21, -44), + std::pair<unsigned, int>(PPC::R20, -48), + std::pair<unsigned, int>(PPC::R19, -52), + std::pair<unsigned, int>(PPC::R18, -56), + std::pair<unsigned, int>(PPC::R17, -60), + std::pair<unsigned, int>(PPC::R16, -64), + std::pair<unsigned, int>(PPC::R15, -68), + std::pair<unsigned, int>(PPC::R14, -72), + + // CR save area offset. + // FIXME SVR4: Disable CR save area for now. +// std::pair<unsigned, int>(PPC::CR2, -4), +// std::pair<unsigned, int>(PPC::CR3, -4), +// std::pair<unsigned, int>(PPC::CR4, -4), +// std::pair<unsigned, int>(PPC::CR2LT, -4), +// std::pair<unsigned, int>(PPC::CR2GT, -4), +// std::pair<unsigned, int>(PPC::CR2EQ, -4), +// std::pair<unsigned, int>(PPC::CR2UN, -4), +// std::pair<unsigned, int>(PPC::CR3LT, -4), +// std::pair<unsigned, int>(PPC::CR3GT, -4), +// std::pair<unsigned, int>(PPC::CR3EQ, -4), +// std::pair<unsigned, int>(PPC::CR3UN, -4), +// std::pair<unsigned, int>(PPC::CR4LT, -4), +// std::pair<unsigned, int>(PPC::CR4GT, -4), +// std::pair<unsigned, int>(PPC::CR4EQ, -4), +// std::pair<unsigned, int>(PPC::CR4UN, -4), + + // VRSAVE save area offset. + std::pair<unsigned, int>(PPC::VRSAVE, -4), + + // Vector register save area + std::pair<unsigned, int>(PPC::V31, -16), + std::pair<unsigned, int>(PPC::V30, -32), + std::pair<unsigned, int>(PPC::V29, -48), + std::pair<unsigned, int>(PPC::V28, -64), + std::pair<unsigned, int>(PPC::V27, -80), + std::pair<unsigned, int>(PPC::V26, -96), + std::pair<unsigned, int>(PPC::V25, -112), + std::pair<unsigned, int>(PPC::V24, -128), + std::pair<unsigned, int>(PPC::V23, -144), + std::pair<unsigned, int>(PPC::V22, -160), + std::pair<unsigned, int>(PPC::V21, -176), + std::pair<unsigned, int>(PPC::V20, -192) + }; + + NumEntries = array_lengthof(Offsets); + + return Offsets; } - }; } // End llvm namespace diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index e7658fc..ec3e757 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -159,7 +159,7 @@ getHazardType(SUnit *SU) { } // Do not allow MTCTR and BCTRL to be in the same dispatch group. - if (HasCTRSet && (Opcode == PPC::BCTRL_Macho || Opcode == PPC::BCTRL_ELF)) + if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4)) return NoopHazard; // If this is a load following a store, make sure it's not to the same or diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 87f8fb0b4..1c6b287 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -35,6 +35,21 @@ #include "llvm/DerivedTypes.h" using namespace llvm; +static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); + static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc", cl::desc("enable preincrement load/store generation on PPC (experimental)"), cl::Hidden); @@ -190,8 +205,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); - // VAARG is custom lowered with ELF 32 ABI - if (TM.getSubtarget<PPCSubtarget>().isELF32_ABI()) + // VAARG is custom lowered with the SVR4 ABI + if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) setOperationAction(ISD::VAARG, MVT::Other, Custom); else setOperationAction(ISD::VAARG, MVT::Other, Expand); @@ -380,7 +395,7 @@ unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const { // Darwin passes everything on 4 byte boundary. if (TM.getSubtarget<PPCSubtarget>().isDarwin()) return 4; - // FIXME Elf TBD + // FIXME SVR4 TBD return 4; } @@ -404,11 +419,11 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::SHL: return "PPCISD::SHL"; case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; case PPCISD::STD_32: return "PPCISD::STD_32"; - case PPCISD::CALL_ELF: return "PPCISD::CALL_ELF"; - case PPCISD::CALL_Macho: return "PPCISD::CALL_Macho"; + case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4"; + case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; - case PPCISD::BCTRL_Macho: return "PPCISD::BCTRL_Macho"; - case PPCISD::BCTRL_ELF: return "PPCISD::BCTRL_ELF"; + case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin"; + case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4"; case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; case PPCISD::MFCR: return "PPCISD::MFCR"; case PPCISD::VCMP: return "PPCISD::VCMP"; @@ -428,11 +443,17 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { } } - MVT PPCTargetLowering::getSetCCResultType(MVT VT) const { return MVT::i32; } +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned PPCTargetLowering::getFunctionAlignment(const Function *F) const { + if (getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin()) + return F->hasFnAttr(Attribute::OptimizeForSize) ? 2 : 4; + else + return 2; +} //===----------------------------------------------------------------------===// // Node matching predicates, for use by the tblgen matching code. @@ -1228,7 +1249,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, unsigned VarArgsNumFPR, const PPCSubtarget &Subtarget) { - assert(0 && "VAARG in ELF32 ABI not implemented yet!"); + assert(0 && "VAARG not yet implemented for the SVR4 ABI!"); return SDValue(); // Not reached } @@ -1261,7 +1282,7 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) { // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) std::pair<SDValue, SDValue> CallResult = LowerCallTo(Chain, Op.getValueType().getTypeForMVT(), false, false, - false, false, CallingConv::C, false, + false, false, 0, CallingConv::C, false, DAG.getExternalSymbol("__trampoline_setup", PtrVT), Args, DAG, dl); @@ -1279,7 +1300,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) { DebugLoc dl = Op.getDebugLoc(); - if (Subtarget.isMachoABI()) { + if (Subtarget.isDarwinABI()) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -1288,7 +1309,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); } - // For ELF 32 ABI we follow the layout of the va_list struct. + // For the SVR4 ABI we follow the layout of the va_list struct. // We suppose the given va_list is already allocated. // // typedef struct { @@ -1313,8 +1334,8 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, // } va_list[1]; - SDValue ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i8); - SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i8); + SDValue ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i32); + SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i32); MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -1334,15 +1355,15 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); // Store first byte : number of int regs - SDValue firstStore = DAG.getStore(Op.getOperand(0), dl, ArgGPR, - Op.getOperand(1), SV, 0); + SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, + Op.getOperand(1), SV, 0, MVT::i8); uint64_t nextOffset = FPROffset; SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), ConstFPROffset); // Store second byte : number of float regs SDValue secondStore = - DAG.getStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset); + DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8); nextOffset += StackOffset; nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); @@ -1359,10 +1380,71 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, #include "PPCGenCallingConv.inc" +static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + return true; +} + +static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const unsigned ArgRegs[] = { + PPC::R3, PPC::R4, PPC::R5, PPC::R6, + PPC::R7, PPC::R8, PPC::R9, PPC::R10, + }; + const unsigned NumArgRegs = array_lengthof(ArgRegs); + + unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); + + // Skip one register if the first unallocated register has an even register + // number and there are still argument registers available which have not been + // allocated yet. RegNum is actually an index into ArgRegs, which means we + // need to skip a register if RegNum is odd. + if (RegNum != NumArgRegs && RegNum % 2 == 1) { + State.AllocateReg(ArgRegs[RegNum]); + } + + // Always return false here, as this function only makes sure that the first + // unallocated register has an odd register number and does not actually + // allocate a register for the current argument. + return false; +} + +static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + static const unsigned ArgRegs[] = { + PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8 + }; + + const unsigned NumArgRegs = array_lengthof(ArgRegs); + + unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); + + // If there is only one Floating-point register left we need to put both f64 + // values of a split ppc_fp128 value on the stack. + if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { + State.AllocateReg(ArgRegs[RegNum]); + } + + // Always return false here, as this function only makes sure that the two f64 + // values a ppc_fp128 value is split into are both passed in registers or both + // passed on the stack and does not actually allocate a register for the + // current argument. + return false; +} + /// GetFPR - Get the set of FP registers that should be allocated for arguments, /// depending on which subtarget is selected. static const unsigned *GetFPR(const PPCSubtarget &Subtarget) { - if (Subtarget.isMachoABI()) { + if (Subtarget.isDarwinABI()) { static const unsigned FPR[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 @@ -1381,9 +1463,9 @@ static const unsigned *GetFPR(const PPCSubtarget &Subtarget) { /// CalculateStackSlotSize - Calculates the size reserved for this argument on /// the stack. static unsigned CalculateStackSlotSize(SDValue Arg, ISD::ArgFlagsTy Flags, - bool isVarArg, unsigned PtrByteSize) { + unsigned PtrByteSize) { MVT ArgVT = Arg.getValueType(); - unsigned ArgSize =ArgVT.getSizeInBits()/8; + unsigned ArgSize = ArgVT.getSizeInBits()/8; if (Flags.isByVal()) ArgSize = Flags.getByValSize(); ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; @@ -1392,18 +1474,248 @@ static unsigned CalculateStackSlotSize(SDValue Arg, ISD::ArgFlagsTy Flags, } SDValue -PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, - SelectionDAG &DAG, - int &VarArgsFrameIndex, - int &VarArgsStackOffset, - unsigned &VarArgsNumGPR, - unsigned &VarArgsNumFPR, - const PPCSubtarget &Subtarget) { +PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op, + SelectionDAG &DAG, + int &VarArgsFrameIndex, + int &VarArgsStackOffset, + unsigned &VarArgsNumGPR, + unsigned &VarArgsNumFPR, + const PPCSubtarget &Subtarget) { + // SVR4 ABI Stack Frame Layout: + // +-----------------------------------+ + // +--> | Back chain | + // | +-----------------------------------+ + // | | Floating-point register save area | + // | +-----------------------------------+ + // | | General register save area | + // | +-----------------------------------+ + // | | CR save word | + // | +-----------------------------------+ + // | | VRSAVE save word | + // | +-----------------------------------+ + // | | Alignment padding | + // | +-----------------------------------+ + // | | Vector register save area | + // | +-----------------------------------+ + // | | Local variable space | + // | +-----------------------------------+ + // | | Parameter list area | + // | +-----------------------------------+ + // | | LR save word | + // | +-----------------------------------+ + // SP--> +--- | Back chain | + // +-----------------------------------+ + // + // Specifications: + // System V Application Binary Interface PowerPC Processor Supplement + // AltiVec Technology Programming Interface Manual + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + SmallVector<SDValue, 8> ArgValues; + SDValue Root = Op.getOperand(0); + bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0; + DebugLoc dl = Op.getDebugLoc(); + + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + // Potential tail calls could cause overwriting of argument stack slots. + unsigned CC = MF.getFunction()->getCallingConv(); + bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast)); + unsigned PtrByteSize = 4; + + // Assign locations to all of the incoming arguments. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + + // Reserve space for the linkage area on the stack. + CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize); + + CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_PPC_SVR4); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + + // Arguments stored in registers. + if (VA.isRegLoc()) { + TargetRegisterClass *RC; + MVT ValVT = VA.getValVT(); + + switch (ValVT.getSimpleVT()) { + default: + assert(0 && "ValVT not supported by FORMAL_ARGUMENTS Lowering"); + case MVT::i32: + RC = PPC::GPRCRegisterClass; + break; + case MVT::f32: + RC = PPC::F4RCRegisterClass; + break; + case MVT::f64: + RC = PPC::F8RCRegisterClass; + break; + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v4f32: + RC = PPC::VRRCRegisterClass; + break; + } + + // Transform the arguments stored in physical registers into virtual ones. + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, ValVT); + + ArgValues.push_back(ArgValue); + } else { + // Argument stored in memory. + assert(VA.isMemLoc()); + + unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; + int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), + isImmutable); + + // Create load nodes to retrieve arguments from the stack. + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + ArgValues.push_back(DAG.getLoad(VA.getValVT(), dl, Root, FIN, NULL, 0)); + } + } + + // Assign locations to all of the incoming aggregate by value arguments. + // Aggregates passed by value are stored in the local variable space of the + // caller's stack frame, right above the parameter list area. + SmallVector<CCValAssign, 16> ByValArgLocs; + CCState CCByValInfo(CC, isVarArg, getTargetMachine(), ByValArgLocs); + + // Reserve stack space for the allocations in CCInfo. + CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); + + CCByValInfo.AnalyzeFormalArguments(Op.getNode(), CC_PPC_SVR4_ByVal); + + // Area that is at least reserved in the caller of this function. + unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); + + // Set the size that is at least reserved in caller of this function. Tail + // call optimized function's reserved stack space needs to be aligned so that + // taking the difference between two stack areas will result in an aligned + // stack. + PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); + + MinReservedArea = + std::max(MinReservedArea, + PPCFrameInfo::getMinCallFrameSize(false, false)); + + unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()-> + getStackAlignment(); + unsigned AlignMask = TargetAlign-1; + MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; + + FI->setMinReservedArea(MinReservedArea); + + SmallVector<SDValue, 8> MemOps; + + // If the function takes variable number of arguments, make a frame index for + // the start of the first vararg value... for expansion of llvm.va_start. + if (isVarArg) { + static const unsigned GPArgRegs[] = { + PPC::R3, PPC::R4, PPC::R5, PPC::R6, + PPC::R7, PPC::R8, PPC::R9, PPC::R10, + }; + const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); + + static const unsigned FPArgRegs[] = { + PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, + PPC::F8 + }; + const unsigned NumFPArgRegs = array_lengthof(FPArgRegs); + + VarArgsNumGPR = CCInfo.getFirstUnallocated(GPArgRegs, NumGPArgRegs); + VarArgsNumFPR = CCInfo.getFirstUnallocated(FPArgRegs, NumFPArgRegs); + + // Make room for NumGPArgRegs and NumFPArgRegs. + int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + + NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8; + + VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, + CCInfo.getNextStackOffset()); + + VarArgsFrameIndex = MFI->CreateStackObject(Depth, 8); + SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + + // The fixed integer arguments of a variadic function are + // stored to the VarArgsFrameIndex on the stack. + unsigned GPRIndex = 0; + for (; GPRIndex != VarArgsNumGPR; ++GPRIndex) { + SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT); + SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0); + MemOps.push_back(Store); + // Increment the address by four for the next argument to store + SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); + } + + // If this function is vararg, store any remaining integer argument regs + // to their spots on the stack so that they may be loaded by deferencing the + // result of va_next. + for (; GPRIndex != NumGPArgRegs; ++GPRIndex) { + unsigned VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); + + SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + MemOps.push_back(Store); + // Increment the address by four for the next argument to store + SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); + } + + // FIXME SVR4: We only need to save FP argument registers if CR bit 6 is + // set. + + // The double arguments are stored to the VarArgsFrameIndex + // on the stack. + unsigned FPRIndex = 0; + for (FPRIndex = 0; FPRIndex != VarArgsNumFPR; ++FPRIndex) { + SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64); + SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0); + MemOps.push_back(Store); + // Increment the address by eight for the next argument to store + SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, + PtrVT); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); + } + + for (; FPRIndex != NumFPArgRegs; ++FPRIndex) { + unsigned VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); + + SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::f64); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + MemOps.push_back(Store); + // Increment the address by eight for the next argument to store + SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, + PtrVT); + FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); + } + } + + if (!MemOps.empty()) + Root = DAG.getNode(ISD::TokenFactor, dl, + MVT::Other, &MemOps[0], MemOps.size()); + + + ArgValues.push_back(Root); + + // Return the new list of results. + return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), + &ArgValues[0], ArgValues.size()).getValue(Op.getResNo()); +} + +SDValue +PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, + SelectionDAG &DAG, + int &VarArgsFrameIndex, + const PPCSubtarget &Subtarget) { // TODO: add description of PPC stack frame format, or at least some docs. // MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); SmallVector<SDValue, 8> ArgValues; SDValue Root = Op.getOperand(0); bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0; @@ -1411,14 +1723,12 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; - bool isMachoABI = Subtarget.isMachoABI(); - bool isELF32_ABI = Subtarget.isELF32_ABI(); // Potential tail calls could cause overwriting of argument stack slots. unsigned CC = MF.getFunction()->getCallingConv(); bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast)); unsigned PtrByteSize = isPPC64 ? 8 : 4; - unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); + unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true); // Area that is at least reserved in caller of this function. unsigned MinReservedArea = ArgOffset; @@ -1439,7 +1749,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, }; const unsigned Num_GPR_Regs = array_lengthof(GPR_32); - const unsigned Num_FPR_Regs = isMachoABI ? 13 : 8; + const unsigned Num_FPR_Regs = 13; const unsigned Num_VR_Regs = array_lengthof( VR); unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; @@ -1453,8 +1763,6 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, // that out...for the pathological case, compute VecArgOffset as the // start of the vector parameter area. Computing VecArgOffset is the // entire point of the following loop. - // Altivec is not mentioned in the ppc32 Elf Supplement, so I'm not trying - // to handle Elf here. unsigned VecArgOffset = ArgOffset; if (!isVarArg && !isPPC64) { for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues()-1; ArgNo != e; @@ -1500,10 +1808,6 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, // Add DAG nodes to load the arguments or copy them out of registers. On // entry to a function on PPC, the arguments start after the linkage area, // although the first ones are often in registers. - // - // In the ELF 32 ABI, GPRs and stack are double word align: an argument - // represented with two words (long long or double) must be copied to an - // even GPR_idx value or to an even ArgOffset value. SmallVector<SDValue, 8> MemOps; unsigned nAltivecParamsAtEnd = 0; @@ -1516,8 +1820,6 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags(); - // See if next argument requires stack alignment in ELF - bool Align = Flags.isSplit(); unsigned CurArgOffset = ArgOffset; @@ -1528,25 +1830,20 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, MinReservedArea = ((MinReservedArea+15)/16)*16; MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo), Flags, - isVarArg, PtrByteSize); } else nAltivecParamsAtEnd++; } else // Calculate min reserved area. MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo), Flags, - isVarArg, PtrByteSize); - // FIXME alignment for ELF may not be right // FIXME the codegen can be much improved in some cases. // We do not have to keep everything in memory. if (Flags.isByVal()) { // ObjSize is the true size, ArgSize rounded up to multiple of registers. ObjSize = Flags.getByValSize(); ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; - // Double word align in ELF - if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2); // Objects of size 1 and 2 are right justified, everything else is // left justified. This means the memory address is adjusted forwards. if (ObjSize==1 || ObjSize==2) { @@ -1558,17 +1855,16 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, ArgValues.push_back(FIN); if (ObjSize==1 || ObjSize==2) { if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); - RegInfo.addLiveIn(GPR[GPR_idx], VReg); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT); SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 ); MemOps.push_back(Store); ++GPR_idx; - if (isMachoABI) ArgOffset += PtrByteSize; - } else { - ArgOffset += PtrByteSize; } + + ArgOffset += PtrByteSize; + continue; } for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { @@ -1576,15 +1872,14 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, // to memory. ArgVal will be address of the beginning of // the object. if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); - RegInfo.addLiveIn(GPR[GPR_idx], VReg); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); MemOps.push_back(Store); ++GPR_idx; - if (isMachoABI) ArgOffset += PtrByteSize; + ArgOffset += PtrByteSize; } else { ArgOffset += ArgSize - (ArgOffset-CurArgOffset); break; @@ -1597,30 +1892,22 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, default: assert(0 && "Unhandled argument type!"); case MVT::i32: if (!isPPC64) { - // Double word align in ELF - if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2); - if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); - RegInfo.addLiveIn(GPR[GPR_idx], VReg); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32); ++GPR_idx; } else { needsLoad = true; ArgSize = PtrByteSize; } - // Stack align in ELF - if (needsLoad && Align && isELF32_ABI) - ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; - // All int arguments reserve stack space in Macho ABI. - if (isMachoABI || needsLoad) ArgOffset += PtrByteSize; + // All int arguments reserve stack space in the Darwin ABI. + ArgOffset += PtrByteSize; break; } // FALLTHROUGH case MVT::i64: // PPC64 if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); - RegInfo.addLiveIn(GPR[GPR_idx], VReg); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32) { @@ -1641,37 +1928,35 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, needsLoad = true; ArgSize = PtrByteSize; } - // All int arguments reserve stack space in Macho ABI. - if (isMachoABI || needsLoad) ArgOffset += 8; + // All int arguments reserve stack space in the Darwin ABI. + ArgOffset += 8; break; case MVT::f32: case MVT::f64: // Every 4 bytes of argument space consumes one of the GPRs available for // argument passing. - if (GPR_idx != Num_GPR_Regs && isMachoABI) { + if (GPR_idx != Num_GPR_Regs) { ++GPR_idx; if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) ++GPR_idx; } if (FPR_idx != Num_FPR_Regs) { unsigned VReg; + if (ObjectVT == MVT::f32) - VReg = RegInfo.createVirtualRegister(&PPC::F4RCRegClass); + VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); else - VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); - RegInfo.addLiveIn(FPR[FPR_idx], VReg); + VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); + ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT); ++FPR_idx; } else { needsLoad = true; } - // Stack align in ELF - if (needsLoad && Align && isELF32_ABI) - ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; - // All FP arguments reserve stack space in Macho ABI. - if (isMachoABI || needsLoad) ArgOffset += isPPC64 ? 8 : ObjSize; + // All FP arguments reserve stack space in the Darwin ABI. + ArgOffset += isPPC64 ? 8 : ObjSize; break; case MVT::v4f32: case MVT::v4i32: @@ -1680,8 +1965,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, // Note that vector arguments in registers don't reserve stack space, // except in varargs functions. if (VR_idx != Num_VR_Regs) { - unsigned VReg = RegInfo.createVirtualRegister(&PPC::VRRCRegClass); - RegInfo.addLiveIn(VR[VR_idx], VReg); + unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT); if (isVarArg) { while ((ArgOffset % 16) != 0) { @@ -1734,7 +2018,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, } MinReservedArea = std::max(MinReservedArea, - PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI)); + PPCFrameInfo::getMinCallFrameSize(isPPC64, true)); unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()-> getStackAlignment(); unsigned AlignMask = TargetAlign-1; @@ -1744,53 +2028,23 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { - - int depth; - if (isELF32_ABI) { - VarArgsNumGPR = GPR_idx; - VarArgsNumFPR = FPR_idx; - - // Make room for Num_GPR_Regs, Num_FPR_Regs and for a possible frame - // pointer. - depth = -(Num_GPR_Regs * PtrVT.getSizeInBits()/8 + - Num_FPR_Regs * MVT(MVT::f64).getSizeInBits()/8 + - PtrVT.getSizeInBits()/8); - - VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, - ArgOffset); - - } - else - depth = ArgOffset; + int Depth = ArgOffset; VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, - depth); + Depth); SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); - // In ELF 32 ABI, the fixed integer arguments of a variadic function are - // stored to the VarArgsFrameIndex on the stack. - if (isELF32_ABI) { - for (GPR_idx = 0; GPR_idx != VarArgsNumGPR; ++GPR_idx) { - SDValue Val = DAG.getRegister(GPR[GPR_idx], PtrVT); - SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0); - MemOps.push_back(Store); - // Increment the address by four for the next argument to store - SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); - FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); - } - } - // If this function is vararg, store any remaining integer argument regs // to their spots on the stack so that they may be loaded by deferencing the // result of va_next. for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { unsigned VReg; + if (isPPC64) - VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); else - VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); - RegInfo.addLiveIn(GPR[GPR_idx], VReg); SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); MemOps.push_back(Store); @@ -1798,34 +2052,6 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } - - // In ELF 32 ABI, the double arguments are stored to the VarArgsFrameIndex - // on the stack. - if (isELF32_ABI) { - for (FPR_idx = 0; FPR_idx != VarArgsNumFPR; ++FPR_idx) { - SDValue Val = DAG.getRegister(FPR[FPR_idx], MVT::f64); - SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0); - MemOps.push_back(Store); - // Increment the address by eight for the next argument to store - SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, - PtrVT); - FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); - } - - for (; FPR_idx != Num_FPR_Regs; ++FPR_idx) { - unsigned VReg; - VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); - - RegInfo.addLiveIn(FPR[FPR_idx], VReg); - SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::f64); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); - MemOps.push_back(Store); - // Increment the address by eight for the next argument to store - SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, - PtrVT); - FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); - } - } } if (!MemOps.empty()) @@ -1840,11 +2066,10 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, } /// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus -/// linkage area. +/// linkage area for the Darwin ABI. static unsigned CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, bool isPPC64, - bool isMachoABI, bool isVarArg, unsigned CC, CallSDNode *TheCall, @@ -1852,7 +2077,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. We start with 24/48 bytes, which is // prereserved space for [SP][CR][LR][3 x unused]. - unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); + unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, true); unsigned NumOps = TheCall->getNumArgs(); unsigned PtrByteSize = isPPC64 ? 8 : 4; @@ -1879,7 +2104,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. NumBytes = ((NumBytes+15)/16)*16; } - NumBytes += CalculateStackSlotSize(Arg, Flags, isVarArg, PtrByteSize); + NumBytes += CalculateStackSlotSize(Arg, Flags, PtrByteSize); } // Allow for Altivec parameters at the end, if needed. @@ -1894,7 +2119,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. NumBytes = std::max(NumBytes, - PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI)); + PPCFrameInfo::getMinCallFrameSize(isPPC64, true)); // Tail call needs the stack to be aligned. if (CC==CallingConv::Fast && PerformTailCallOpt) { @@ -2017,26 +2242,30 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue OldFP, int SPDiff, bool isPPC64, - bool isMachoABI, + bool isDarwinABI, DebugLoc dl) { if (SPDiff) { // Calculate the new stack slot for the return address. int SlotSize = isPPC64 ? 8 : 4; int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64, - isMachoABI); + isDarwinABI); int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewRetAddrLoc); - int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, - isMachoABI); - int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc); - MVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, PseudoSourceValue::getFixedStack(NewRetAddr), 0); - SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); - Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, - PseudoSourceValue::getFixedStack(NewFPIdx), 0); + + // When using the SVR4 ABI there is no need to move the FP stack slot + // as the FP is never overwritten. + if (isDarwinABI) { + int NewFPLoc = + SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); + int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc); + SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); + Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, + PseudoSourceValue::getFixedStack(NewFPIdx), 0); + } } return Chain; } @@ -2067,6 +2296,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, SDValue Chain, SDValue &LROpOut, SDValue &FPOpOut, + bool isDarwinABI, DebugLoc dl) { if (SPDiff) { // Load the LR and FP stack slot for later adjusting. @@ -2074,9 +2304,14 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, LROpOut = getReturnAddrFrameIndex(DAG); LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0); Chain = SDValue(LROpOut.getNode(), 1); - FPOpOut = getFramePointerFrameIndex(DAG); - FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0); - Chain = SDValue(FPOpOut.getNode(), 1); + + // When using the SVR4 ABI there is no need to load the FP stack slot + // as the FP is never overwritten. + if (isDarwinABI) { + FPOpOut = getFramePointerFrameIndex(DAG); + FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0); + Chain = SDValue(FPOpOut.getNode(), 1); + } } return Chain; } @@ -2090,8 +2325,8 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, - unsigned Size, DebugLoc dl) { - SDValue SizeNode = DAG.getConstant(Size, MVT::i32); + DebugLoc dl) { + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), false, NULL, 0, NULL, 0); } @@ -2122,21 +2357,387 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, TailCallArguments); } -SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget, - TargetMachine &TM) { +static +void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, + DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, + SDValue LROp, SDValue FPOp, bool isDarwinABI, + SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) { + MachineFunction &MF = DAG.getMachineFunction(); + + // Emit a sequence of copyto/copyfrom virtual registers for arguments that + // might overwrite each other in case of tail call optimization. + SmallVector<SDValue, 8> MemOpChains2; + // Do not flag preceeding copytoreg stuff together with the following stuff. + InFlag = SDValue(); + StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, + MemOpChains2, dl); + if (!MemOpChains2.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains2[0], MemOpChains2.size()); + + // Store the return address to the appropriate stack slot. + Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, + isPPC64, isDarwinABI, dl); + + // Emit callseq_end just before tailcall node. + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); + InFlag = Chain.getValue(1); +} + +static +unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, + SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall, + SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, + SmallVector<SDValue, 8> &Ops, std::vector<MVT> &NodeTys, + bool isSVR4ABI) { + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + NodeTys.push_back(MVT::Other); // Returns a chain + NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. + + unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin; + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType()); + else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType()); + else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) + // If this is an absolute destination address, use the munged value. + Callee = SDValue(Dest, 0); + else { + // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair + // to do the call, we can't use PPCISD::CALL. + SDValue MTCTROps[] = {Chain, Callee, InFlag}; + Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps, + 2 + (InFlag.getNode() != 0)); + InFlag = Chain.getValue(1); + + NodeTys.clear(); + NodeTys.push_back(MVT::Other); + NodeTys.push_back(MVT::Flag); + Ops.push_back(Chain); + CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin; + Callee.setNode(0); + // Add CTR register as callee so a bctr can be emitted later. + if (isTailCall) + Ops.push_back(DAG.getRegister(PPC::CTR, PtrVT)); + } + + // If this is a direct call, pass the chain and the callee. + if (Callee.getNode()) { + Ops.push_back(Chain); + Ops.push_back(Callee); + } + // If this is a tail call add stack pointer delta. + if (isTailCall) + Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); + + // Add argument registers to the end of the list so that they are known live + // into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + return CallOpc; +} + +static SDValue LowerCallReturn(SDValue Op, SelectionDAG &DAG, TargetMachine &TM, + CallSDNode *TheCall, SDValue Chain, + SDValue InFlag) { + bool isVarArg = TheCall->isVarArg(); + DebugLoc dl = TheCall->getDebugLoc(); + SmallVector<SDValue, 16> ResultVals; + SmallVector<CCValAssign, 16> RVLocs; + unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv(); + CCState CCRetInfo(CallerCC, isVarArg, TM, RVLocs); + CCRetInfo.AnalyzeCallResult(TheCall, RetCC_PPC); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + CCValAssign &VA = RVLocs[i]; + MVT VT = VA.getValVT(); + assert(VA.isRegLoc() && "Can only return in registers!"); + Chain = DAG.getCopyFromReg(Chain, dl, + VA.getLocReg(), VT, InFlag).getValue(1); + ResultVals.push_back(Chain.getValue(0)); + InFlag = Chain.getValue(2); + } + + // If the function returns void, just return the chain. + if (RVLocs.empty()) + return Chain; + + // Otherwise, merge everything together with a MERGE_VALUES node. + ResultVals.push_back(Chain); + SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(), + &ResultVals[0], ResultVals.size()); + return Res.getValue(Op.getResNo()); +} + +static +SDValue FinishCall(SelectionDAG &DAG, CallSDNode *TheCall, TargetMachine &TM, + SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, + SDValue Op, SDValue InFlag, SDValue Chain, SDValue &Callee, + int SPDiff, unsigned NumBytes) { + unsigned CC = TheCall->getCallingConv(); + DebugLoc dl = TheCall->getDebugLoc(); + bool isTailCall = TheCall->isTailCall() + && CC == CallingConv::Fast && PerformTailCallOpt; + + std::vector<MVT> NodeTys; + SmallVector<SDValue, 8> Ops; + unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, + isTailCall, RegsToPass, Ops, NodeTys, + TM.getSubtarget<PPCSubtarget>().isSVR4ABI()); + + // When performing tail call optimization the callee pops its arguments off + // the stack. Account for this here so these bytes can be pushed back on in + // PPCRegisterInfo::eliminateCallFramePseudoInstr. + int BytesCalleePops = + (CC==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0; + + if (InFlag.getNode()) + Ops.push_back(InFlag); + + // Emit tail call. + if (isTailCall) { + assert(InFlag.getNode() && + "Flag must be set. Depend on flag being set in LowerRET"); + Chain = DAG.getNode(PPCISD::TAILCALL, dl, + TheCall->getVTList(), &Ops[0], Ops.size()); + return SDValue(Chain.getNode(), Op.getResNo()); + } + + Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(BytesCalleePops, true), + InFlag); + if (TheCall->getValueType(0) != MVT::Other) + InFlag = Chain.getValue(1); + + return LowerCallReturn(Op, DAG, TM, TheCall, Chain, InFlag); +} + +SDValue PPCTargetLowering::LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget, + TargetMachine &TM) { + // See PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4() for a description + // of the SVR4 ABI stack frame layout. CallSDNode *TheCall = cast<CallSDNode>(Op.getNode()); SDValue Chain = TheCall->getChain(); bool isVarArg = TheCall->isVarArg(); unsigned CC = TheCall->getCallingConv(); + assert((CC == CallingConv::C || + CC == CallingConv::Fast) && "Unknown calling convention!"); bool isTailCall = TheCall->isTailCall() && CC == CallingConv::Fast && PerformTailCallOpt; SDValue Callee = TheCall->getCallee(); - unsigned NumOps = TheCall->getNumArgs(); DebugLoc dl = TheCall->getDebugLoc(); - bool isMachoABI = Subtarget.isMachoABI(); - bool isELF32_ABI = Subtarget.isELF32_ABI(); + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + unsigned PtrByteSize = 4; + + MachineFunction &MF = DAG.getMachineFunction(); + + // Mark this function as potentially containing a function that contains a + // tail call. As a consequence the frame pointer will be used for dynamicalloc + // and restoring the callers stack pointer in this functions epilog. This is + // done because by tail calling the called function might overwrite the value + // in this function's (MF) stack pointer stack slot 0(SP). + if (PerformTailCallOpt && CC==CallingConv::Fast) + MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); + + // Count how many bytes are to be pushed on the stack, including the linkage + // area, parameter list area and the part of the local variable space which + // contains copies of aggregates which are passed by value. + + // Assign locations to all of the outgoing arguments. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + + // Reserve space for the linkage area on the stack. + CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize); + + if (isVarArg) { + // Handle fixed and variable vector arguments differently. + // Fixed vector arguments go into registers as long as registers are + // available. Variable vector arguments always go into memory. + unsigned NumArgs = TheCall->getNumArgs(); + unsigned NumFixedArgs = TheCall->getNumFixedArgs(); + + for (unsigned i = 0; i != NumArgs; ++i) { + MVT ArgVT = TheCall->getArg(i).getValueType(); + ISD::ArgFlagsTy ArgFlags = TheCall->getArgFlags(i); + bool Result; + + if (i < NumFixedArgs) { + Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, + CCInfo); + } else { + Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, + ArgFlags, CCInfo); + } + + if (Result) { + cerr << "Call operand #" << i << " has unhandled type " + << ArgVT.getMVTString() << "\n"; + abort(); + } + } + } else { + // All arguments are treated the same. + CCInfo.AnalyzeCallOperands(TheCall, CC_PPC_SVR4); + } + + // Assign locations to all of the outgoing aggregate by value arguments. + SmallVector<CCValAssign, 16> ByValArgLocs; + CCState CCByValInfo(CC, isVarArg, getTargetMachine(), ByValArgLocs); + + // Reserve stack space for the allocations in CCInfo. + CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); + + CCByValInfo.AnalyzeCallOperands(TheCall, CC_PPC_SVR4_ByVal); + + // Size of the linkage area, parameter list area and the part of the local + // space variable where copies of aggregates which are passed by value are + // stored. + unsigned NumBytes = CCByValInfo.getNextStackOffset(); + + // Calculate by how many bytes the stack has to be adjusted in case of tail + // call optimization. + int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); + + // Adjust the stack pointer for the new arguments... + // These operations are automatically eliminated by the prolog/epilog pass + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + SDValue CallSeqStart = Chain; + + // Load the return address and frame pointer so it can be moved somewhere else + // later. + SDValue LROp, FPOp; + Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false, + dl); + + // Set up a copy of the stack pointer for use loading and storing any + // arguments that may not fit in the registers available for argument + // passing. + SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); + + SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; + SmallVector<TailCallArgumentInfo, 8> TailCallArguments; + SmallVector<SDValue, 8> MemOpChains; + + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, j = 0, e = ArgLocs.size(); + i != e; + ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue Arg = TheCall->getArg(i); + ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); + + if (Flags.isByVal()) { + // Argument is an aggregate which is passed by value, thus we need to + // create a copy of it in the local variable space of the current stack + // frame (which is the stack frame of the caller) and pass the address of + // this copy to the callee. + assert((j < ByValArgLocs.size()) && "Index out of bounds!"); + CCValAssign &ByValVA = ByValArgLocs[j++]; + assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); + + // Memory reserved in the local variable space of the callers stack frame. + unsigned LocMemOffset = ByValVA.getLocMemOffset(); + + SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); + PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + + // Create a copy of the argument in the local area of the current + // stack frame. + SDValue MemcpyCall = + CreateCopyOfByValArgument(Arg, PtrOff, + CallSeqStart.getNode()->getOperand(0), + Flags, DAG, dl); + + // This must go outside the CALLSEQ_START..END. + SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, + CallSeqStart.getNode()->getOperand(1)); + DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), + NewCallSeqStart.getNode()); + Chain = CallSeqStart = NewCallSeqStart; + + // Pass the address of the aggregate copy on the stack either in a + // physical register or in the parameter list area of the current stack + // frame to the callee. + Arg = PtrOff; + } + + if (VA.isRegLoc()) { + // Put argument in a physical register. + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + // Put argument in the parameter list area of the current stack frame. + assert(VA.isMemLoc()); + unsigned LocMemOffset = VA.getLocMemOffset(); + + if (!isTailCall) { + SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); + PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, + PseudoSourceValue::getStack(), LocMemOffset)); + } else { + // Calculate and remember argument location. + CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, + TailCallArguments); + } + } + } + + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into the appropriate regs. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // Set CR6 to true if this is a vararg call. + if (isVarArg) { + SDValue SetCR(DAG.getTargetNode(PPC::CRSET, dl, MVT::i32), 0); + Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag); + InFlag = Chain.getValue(1); + } + + if (isTailCall) { + PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, + false, TailCallArguments); + } + + return FinishCall(DAG, TheCall, TM, RegsToPass, Op, InFlag, Chain, Callee, + SPDiff, NumBytes); +} + +SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget, + TargetMachine &TM) { + CallSDNode *TheCall = cast<CallSDNode>(Op.getNode()); + SDValue Chain = TheCall->getChain(); + bool isVarArg = TheCall->isVarArg(); + unsigned CC = TheCall->getCallingConv(); + bool isTailCall = TheCall->isTailCall() + && CC == CallingConv::Fast && PerformTailCallOpt; + SDValue Callee = TheCall->getCallee(); + unsigned NumOps = TheCall->getNumArgs(); + DebugLoc dl = TheCall->getDebugLoc(); MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; @@ -2144,10 +2745,6 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, MachineFunction &MF = DAG.getMachineFunction(); - // args_to_use will accumulate outgoing args for the PPCISD::CALL case in - // SelectExpr to use to put the arguments in the appropriate registers. - std::vector<SDValue> args_to_use; - // Mark this function as potentially containing a function that contains a // tail call. As a consequence the frame pointer will be used for dynamicalloc // and restoring the callers stack pointer in this functions epilog. This is @@ -2162,8 +2759,8 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, // area, and parameter passing area. We start with 24/48 bytes, which is // prereserved space for [SP][CR][LR][3 x unused]. unsigned NumBytes = - CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isMachoABI, isVarArg, CC, - TheCall, nAltivecParamsAtEnd); + CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CC, TheCall, + nAltivecParamsAtEnd); // Calculate by how many bytes the stack has to be adjusted in case of tail // call optimization. @@ -2177,7 +2774,8 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, // Load the return address and frame pointer so it can be move somewhere else // later. SDValue LROp, FPOp; - Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); + Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, + dl); // Set up a copy of the stack pointer for use loading and storing any // arguments that may not fit in the registers available for argument @@ -2192,7 +2790,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, // memory. Also, if this is a vararg function, floating point operations // must be stored to our stack, and loaded into integer regs as well, if // any integer regs are available for argument passing. - unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); + unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true); unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; static const unsigned GPR_32[] = { // 32-bit registers. @@ -2210,12 +2808,12 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; const unsigned NumGPRs = array_lengthof(GPR_32); - const unsigned NumFPRs = isMachoABI ? 13 : 8; - const unsigned NumVRs = array_lengthof( VR); + const unsigned NumFPRs = 13; + const unsigned NumVRs = array_lengthof(VR); const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; - std::vector<std::pair<unsigned, SDValue> > RegsToPass; + SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; SmallVector<TailCallArgumentInfo, 8> TailCallArguments; SmallVector<SDValue, 8> MemOpChains; @@ -2223,19 +2821,12 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, bool inMem = false; SDValue Arg = TheCall->getArg(i); ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); - // See if next argument requires stack alignment in ELF - bool Align = Flags.isSplit(); // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. SDValue PtrOff; - // Stack align in ELF 32 - if (isELF32_ABI && Align) - PtrOff = DAG.getConstant(ArgOffset + ((ArgOffset/4) % 2) * PtrByteSize, - StackPtr.getValueType()); - else - PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); + PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); @@ -2246,11 +2837,9 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); } - // FIXME Elf untested, what are alignment rules? // FIXME memcpy is used way more than necessary. Correctness first. if (Flags.isByVal()) { unsigned Size = Flags.getByValSize(); - if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2); if (Size==1 || Size==2) { // Very small objects are passed right-justified. // Everything else is passed left-justified. @@ -2260,14 +2849,14 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, NULL, 0, VT); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); - if (isMachoABI) - ArgOffset += PtrByteSize; + + ArgOffset += PtrByteSize; } else { SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType()); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr, CallSeqStart.getNode()->getOperand(0), - Flags, DAG, Size, dl); + Flags, DAG, dl); // This must go outside the CALLSEQ_START..END. SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, CallSeqStart.getNode()->getOperand(1)); @@ -2283,7 +2872,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, // registers. (This is not what the doc says.) SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, CallSeqStart.getNode()->getOperand(0), - Flags, DAG, Size, dl); + Flags, DAG, dl); // This must go outside the CALLSEQ_START..END. SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, CallSeqStart.getNode()->getOperand(1)); @@ -2297,8 +2886,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); - if (isMachoABI) - ArgOffset += PtrByteSize; + ArgOffset += PtrByteSize; } else { ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; break; @@ -2311,8 +2899,6 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, default: assert(0 && "Unexpected ValueType for argument!"); case MVT::i32: case MVT::i64: - // Double word align in ELF - if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2); if (GPR_idx != NumGPRs) { RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); } else { @@ -2321,13 +2907,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, TailCallArguments, dl); inMem = true; } - if (inMem || isMachoABI) { - // Stack align in ELF - if (isELF32_ABI && Align) - ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; - - ArgOffset += PtrByteSize; - } + ArgOffset += PtrByteSize; break; case MVT::f32: case MVT::f64: @@ -2342,28 +2922,24 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, if (GPR_idx != NumGPRs) { SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0); MemOpChains.push_back(Load.getValue(1)); - if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], - Load)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0); MemOpChains.push_back(Load.getValue(1)); - if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], - Load)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } } else { // If we have any FPRs remaining, we may also have GPRs remaining. // Args passed in FPRs consume either 1 (f32) or 2 (f64) available // GPRs. - if (isMachoABI) { - if (GPR_idx != NumGPRs) - ++GPR_idx; - if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && - !isPPC64) // PPC64 has 64-bit GPR's obviously :) - ++GPR_idx; - } + if (GPR_idx != NumGPRs) + ++GPR_idx; + if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && + !isPPC64) // PPC64 has 64-bit GPR's obviously :) + ++GPR_idx; } } else { LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, @@ -2371,15 +2947,10 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, TailCallArguments, dl); inMem = true; } - if (inMem || isMachoABI) { - // Stack align in ELF - if (isELF32_ABI && Align) - ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; - if (isPPC64) - ArgOffset += 8; - else - ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; - } + if (isPPC64) + ArgOffset += 8; + else + ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; break; case MVT::v4f32: case MVT::v4i32: @@ -2475,148 +3046,13 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, InFlag = Chain.getValue(1); } - // With the ELF 32 ABI, set CR6 to true if this is a vararg call. - if (isVarArg && isELF32_ABI) { - SDValue SetCR(DAG.getTargetNode(PPC::CRSET, dl, MVT::i32), 0); - Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag); - InFlag = Chain.getValue(1); - } - - // Emit a sequence of copyto/copyfrom virtual registers for arguments that - // might overwrite each other in case of tail call optimization. if (isTailCall) { - SmallVector<SDValue, 8> MemOpChains2; - // Do not flag preceeding copytoreg stuff together with the following stuff. - InFlag = SDValue(); - StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, - MemOpChains2, dl); - if (!MemOpChains2.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains2[0], MemOpChains2.size()); - - // Store the return address to the appropriate stack slot. - Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, - isPPC64, isMachoABI, dl); - } - - // Emit callseq_end just before tailcall node. - if (isTailCall) { - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(0, true), InFlag); - InFlag = Chain.getValue(1); - } - - std::vector<MVT> NodeTys; - NodeTys.push_back(MVT::Other); // Returns a chain - NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. - - SmallVector<SDValue, 8> Ops; - unsigned CallOpc = isMachoABI? PPCISD::CALL_Macho : PPCISD::CALL_ELF; - - // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every - // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol - // node so that legalize doesn't hack it. - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType()); - else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType()); - else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) - // If this is an absolute destination address, use the munged value. - Callee = SDValue(Dest, 0); - else { - // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair - // to do the call, we can't use PPCISD::CALL. - SDValue MTCTROps[] = {Chain, Callee, InFlag}; - Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps, - 2 + (InFlag.getNode() != 0)); - InFlag = Chain.getValue(1); - - // Copy the callee address into R12/X12 on darwin. - if (isMachoABI) { - unsigned Reg = Callee.getValueType() == MVT::i32 ? PPC::R12 : PPC::X12; - Chain = DAG.getCopyToReg(Chain, dl, Reg, Callee, InFlag); - InFlag = Chain.getValue(1); - } - - NodeTys.clear(); - NodeTys.push_back(MVT::Other); - NodeTys.push_back(MVT::Flag); - Ops.push_back(Chain); - CallOpc = isMachoABI ? PPCISD::BCTRL_Macho : PPCISD::BCTRL_ELF; - Callee.setNode(0); - // Add CTR register as callee so a bctr can be emitted later. - if (isTailCall) - Ops.push_back(DAG.getRegister(PPC::CTR, getPointerTy())); - } - - // If this is a direct call, pass the chain and the callee. - if (Callee.getNode()) { - Ops.push_back(Chain); - Ops.push_back(Callee); + PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp, + FPOp, true, TailCallArguments); } - // If this is a tail call add stack pointer delta. - if (isTailCall) - Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); - // Add argument registers to the end of the list so that they are known live - // into the call. - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(RegsToPass[i].first, - RegsToPass[i].second.getValueType())); - - // When performing tail call optimization the callee pops its arguments off - // the stack. Account for this here so these bytes can be pushed back on in - // PPCRegisterInfo::eliminateCallFramePseudoInstr. - int BytesCalleePops = - (CC==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0; - - if (InFlag.getNode()) - Ops.push_back(InFlag); - - // Emit tail call. - if (isTailCall) { - assert(InFlag.getNode() && - "Flag must be set. Depend on flag being set in LowerRET"); - Chain = DAG.getNode(PPCISD::TAILCALL, dl, - TheCall->getVTList(), &Ops[0], Ops.size()); - return SDValue(Chain.getNode(), Op.getResNo()); - } - - Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); - InFlag = Chain.getValue(1); - - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(BytesCalleePops, true), - InFlag); - if (TheCall->getValueType(0) != MVT::Other) - InFlag = Chain.getValue(1); - - SmallVector<SDValue, 16> ResultVals; - SmallVector<CCValAssign, 16> RVLocs; - unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv(); - CCState CCInfo(CallerCC, isVarArg, TM, RVLocs); - CCInfo.AnalyzeCallResult(TheCall, RetCC_PPC); - - // Copy all of the result registers out of their specified physreg. - for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { - CCValAssign &VA = RVLocs[i]; - MVT VT = VA.getValVT(); - assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyFromReg(Chain, dl, - VA.getLocReg(), VT, InFlag).getValue(1); - ResultVals.push_back(Chain.getValue(0)); - InFlag = Chain.getValue(2); - } - - // If the function returns void, just return the chain. - if (RVLocs.empty()) - return Chain; - - // Otherwise, merge everything together with a MERGE_VALUES node. - ResultVals.push_back(Chain); - SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(), - &ResultVals[0], ResultVals.size()); - return Res.getValue(Op.getResNo()); + return FinishCall(DAG, TheCall, TM, RegsToPass, Op, InFlag, Chain, Callee, + SPDiff, NumBytes); } SDValue PPCTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG, @@ -2716,7 +3152,7 @@ SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool IsPPC64 = PPCSubTarget.isPPC64(); - bool isMachoABI = PPCSubTarget.isMachoABI(); + bool isDarwinABI = PPCSubTarget.isDarwinABI(); MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Get current frame pointer save index. The users of this index will be @@ -2727,7 +3163,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { // If the frame pointer save index hasn't been defined yet. if (!RASI) { // Find out what the fix offset of the frame pointer save area. - int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isMachoABI); + int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI); // Allocate the frame index for frame pointer save area. RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset); // Save the result. @@ -2740,7 +3176,7 @@ SDValue PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool IsPPC64 = PPCSubTarget.isPPC64(); - bool isMachoABI = PPCSubTarget.isMachoABI(); + bool isDarwinABI = PPCSubTarget.isDarwinABI(); MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Get current frame pointer save index. The users of this index will be @@ -2751,7 +3187,8 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { // If the frame pointer save index hasn't been defined yet. if (!FPSI) { // Find out what the fix offset of the frame pointer save area. - int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isMachoABI); + int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, + isDarwinABI); // Allocate the frame index for frame pointer save area. FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset); @@ -3729,12 +4166,22 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); case ISD::FORMAL_ARGUMENTS: - return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex, - VarArgsStackOffset, VarArgsNumGPR, - VarArgsNumFPR, PPCSubTarget); + if (PPCSubTarget.isSVR4ABI()) { + return LowerFORMAL_ARGUMENTS_SVR4(Op, DAG, VarArgsFrameIndex, + VarArgsStackOffset, VarArgsNumGPR, + VarArgsNumFPR, PPCSubTarget); + } else { + return LowerFORMAL_ARGUMENTS_Darwin(Op, DAG, VarArgsFrameIndex, + PPCSubTarget); + } - case ISD::CALL: return LowerCALL(Op, DAG, PPCSubTarget, - getTargetMachine()); + case ISD::CALL: + if (PPCSubTarget.isSVR4ABI()) { + return LowerCALL_SVR4(Op, DAG, PPCSubTarget, getTargetMachine()); + } else { + return LowerCALL_Darwin(Op, DAG, PPCSubTarget, getTargetMachine()); + } + case ISD::RET: return LowerRET(Op, DAG, getTargetMachine()); case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); case ISD::DYNAMIC_STACKALLOC: @@ -4878,3 +5325,13 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The PowerPC target isn't yet aware of offsets. return false; } + +MVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, + bool isSrcConst, bool isSrcStr, + SelectionDAG &DAG) const { + if (this->PPCSubTarget.isPPC64()) { + return MVT::i64; + } else { + return MVT::i32; + } +} diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index b6d046f..962bbb1 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -82,7 +82,7 @@ namespace llvm { STD_32, /// CALL - A direct function call. - CALL_Macho, CALL_ELF, + CALL_Darwin, CALL_SVR4, /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a /// MTCTR instruction. @@ -90,7 +90,7 @@ namespace llvm { /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a /// BCTRL instruction. - BCTRL_Macho, BCTRL_ELF, + BCTRL_Darwin, BCTRL_SVR4, /// Return with a flag operand, matched by 'blr' RET_FLAG, @@ -223,7 +223,6 @@ namespace llvm { // register for parameter passing. unsigned VarArgsNumFPR; // Index of the first unused double // register for parameter passing. - int ReturnAddrIndex; // FrameIndex for return slot. const PPCSubtarget &PPCSubTarget; public: explicit PPCTargetLowering(PPCTargetMachine &TM); @@ -336,6 +335,13 @@ namespace llvm { SelectionDAG &DAG) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + + virtual MVT getOptimalMemOpType(uint64_t Size, unsigned Align, + bool isSrcConst, bool isSrcStr, + SelectionDAG &DAG) const; + + /// getFunctionAlignment - Return the Log2 alignment of this function. + virtual unsigned getFunctionAlignment(const Function *F) const; private: SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; @@ -346,6 +352,7 @@ namespace llvm { SDValue Chain, SDValue &LROpOut, SDValue &FPOpOut, + bool isDarwinABI, DebugLoc dl); SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG); @@ -363,14 +370,19 @@ namespace llvm { SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, int VarArgsFrameIndex, int VarArgsStackOffset, unsigned VarArgsNumGPR, unsigned VarArgsNumFPR, const PPCSubtarget &Subtarget); - SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, - int &VarArgsFrameIndex, - int &VarArgsStackOffset, - unsigned &VarArgsNumGPR, - unsigned &VarArgsNumFPR, - const PPCSubtarget &Subtarget); - SDValue LowerCALL(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget, TargetMachine &TM); + SDValue LowerFORMAL_ARGUMENTS_SVR4(SDValue Op, SelectionDAG &DAG, + int &VarArgsFrameIndex, + int &VarArgsStackOffset, + unsigned &VarArgsNumGPR, + unsigned &VarArgsNumFPR, + const PPCSubtarget &Subtarget); + SDValue LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, SelectionDAG &DAG, + int &VarArgsFrameIndex, + const PPCSubtarget &Subtarget); + SDValue LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget, TargetMachine &TM); + SDValue LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget, TargetMachine &TM); SDValue LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM); SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget); diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 417c8ed..3823e53 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -61,7 +61,7 @@ let Defs = [LR8] in def MovePCtoLR8 : Pseudo<(outs), (ins piclabel:$label), "bl $label", []>, PPC970_Unit_BRU; -// Macho ABI Calls. +// Darwin ABI Calls. let isCall = 1, PPC970_Unit = 7, // All calls clobber the PPC64 non-callee saved registers. Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12, @@ -71,22 +71,22 @@ let isCall = 1, PPC970_Unit = 7, CR0,CR1,CR5,CR6,CR7] in { // Convenient aliases for call instructions let Uses = [RM] in { - def BL8_Macho : IForm<18, 0, 1, - (outs), (ins calltarget:$func, variable_ops), - "bl $func", BrB, []>; // See Pat patterns below. - def BLA8_Macho : IForm<18, 1, 1, - (outs), (ins aaddr:$func, variable_ops), - "bla $func", BrB, [(PPCcall_Macho (i64 imm:$func))]>; + def BL8_Darwin : IForm<18, 0, 1, + (outs), (ins calltarget:$func, variable_ops), + "bl $func", BrB, []>; // See Pat patterns below. + def BLA8_Darwin : IForm<18, 1, 1, + (outs), (ins aaddr:$func, variable_ops), + "bla $func", BrB, [(PPCcall_Darwin (i64 imm:$func))]>; } let Uses = [CTR8, RM] in { - def BCTRL8_Macho : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins variable_ops), - "bctrl", BrB, - [(PPCbctrl_Macho)]>, Requires<[In64BitMode]>; + def BCTRL8_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, + (outs), (ins variable_ops), + "bctrl", BrB, + [(PPCbctrl_Darwin)]>, Requires<[In64BitMode]>; } } -// ELF 64 ABI Calls = Macho ABI Calls +// ELF 64 ABI Calls = Darwin ABI Calls // Used to define BL8_ELF and BLA8_ELF let isCall = 1, PPC970_Unit = 7, // All calls clobber the PPC64 non-callee saved registers. @@ -102,26 +102,26 @@ let isCall = 1, PPC970_Unit = 7, "bl $func", BrB, []>; // See Pat patterns below. def BLA8_ELF : IForm<18, 1, 1, (outs), (ins aaddr:$func, variable_ops), - "bla $func", BrB, [(PPCcall_ELF (i64 imm:$func))]>; + "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>; } let Uses = [CTR8, RM] in { def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins variable_ops), "bctrl", BrB, - [(PPCbctrl_ELF)]>, Requires<[In64BitMode]>; + [(PPCbctrl_SVR4)]>, Requires<[In64BitMode]>; } } // Calls -def : Pat<(PPCcall_Macho (i64 tglobaladdr:$dst)), - (BL8_Macho tglobaladdr:$dst)>; -def : Pat<(PPCcall_Macho (i64 texternalsym:$dst)), - (BL8_Macho texternalsym:$dst)>; +def : Pat<(PPCcall_Darwin (i64 tglobaladdr:$dst)), + (BL8_Darwin tglobaladdr:$dst)>; +def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)), + (BL8_Darwin texternalsym:$dst)>; -def : Pat<(PPCcall_ELF (i64 tglobaladdr:$dst)), +def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)), (BL8_ELF tglobaladdr:$dst)>; -def : Pat<(PPCcall_ELF (i64 texternalsym:$dst)), +def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)), (BL8_ELF texternalsym:$dst)>; // Atomic operations diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 778f034..87c612a 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -444,21 +444,29 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, // not cause any bug. If we need other uses of CR bits, the following // code may be invalid. unsigned Reg = 0; - if (SrcReg >= PPC::CR0LT || SrcReg <= PPC::CR0UN) + if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT || + SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN) Reg = PPC::CR0; - else if (SrcReg >= PPC::CR1LT || SrcReg <= PPC::CR1UN) + else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT || + SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN) Reg = PPC::CR1; - else if (SrcReg >= PPC::CR2LT || SrcReg <= PPC::CR2UN) + else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT || + SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN) Reg = PPC::CR2; - else if (SrcReg >= PPC::CR3LT || SrcReg <= PPC::CR3UN) + else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT || + SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN) Reg = PPC::CR3; - else if (SrcReg >= PPC::CR4LT || SrcReg <= PPC::CR4UN) + else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT || + SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN) Reg = PPC::CR4; - else if (SrcReg >= PPC::CR5LT || SrcReg <= PPC::CR5UN) + else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT || + SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN) Reg = PPC::CR5; - else if (SrcReg >= PPC::CR6LT || SrcReg <= PPC::CR6UN) + else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT || + SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN) Reg = PPC::CR6; - else if (SrcReg >= PPC::CR7LT || SrcReg <= PPC::CR7UN) + else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT || + SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN) Reg = PPC::CR7; return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, @@ -587,21 +595,29 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, } else if (RC == PPC::CRBITRCRegisterClass) { unsigned Reg = 0; - if (DestReg >= PPC::CR0LT || DestReg <= PPC::CR0UN) + if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT || + DestReg == PPC::CR0EQ || DestReg == PPC::CR0UN) Reg = PPC::CR0; - else if (DestReg >= PPC::CR1LT || DestReg <= PPC::CR1UN) + else if (DestReg == PPC::CR1LT || DestReg == PPC::CR1GT || + DestReg == PPC::CR1EQ || DestReg == PPC::CR1UN) Reg = PPC::CR1; - else if (DestReg >= PPC::CR2LT || DestReg <= PPC::CR2UN) + else if (DestReg == PPC::CR2LT || DestReg == PPC::CR2GT || + DestReg == PPC::CR2EQ || DestReg == PPC::CR2UN) Reg = PPC::CR2; - else if (DestReg >= PPC::CR3LT || DestReg <= PPC::CR3UN) + else if (DestReg == PPC::CR3LT || DestReg == PPC::CR3GT || + DestReg == PPC::CR3EQ || DestReg == PPC::CR3UN) Reg = PPC::CR3; - else if (DestReg >= PPC::CR4LT || DestReg <= PPC::CR4UN) + else if (DestReg == PPC::CR4LT || DestReg == PPC::CR4GT || + DestReg == PPC::CR4EQ || DestReg == PPC::CR4UN) Reg = PPC::CR4; - else if (DestReg >= PPC::CR5LT || DestReg <= PPC::CR5UN) + else if (DestReg == PPC::CR5LT || DestReg == PPC::CR5GT || + DestReg == PPC::CR5EQ || DestReg == PPC::CR5UN) Reg = PPC::CR5; - else if (DestReg >= PPC::CR6LT || DestReg <= PPC::CR6UN) + else if (DestReg == PPC::CR6LT || DestReg == PPC::CR6GT || + DestReg == PPC::CR6EQ || DestReg == PPC::CR6UN) Reg = PPC::CR6; - else if (DestReg >= PPC::CR7LT || DestReg <= PPC::CR7UN) + else if (DestReg == PPC::CR7LT || DestReg == PPC::CR7GT || + DestReg == PPC::CR7EQ || DestReg == PPC::CR7UN) Reg = PPC::CR7; return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, @@ -691,16 +707,21 @@ MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, if (OpNum == 0) { // move -> store unsigned InReg = MI->getOperand(1).getReg(); bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STW)) - .addReg(InReg, getKillRegState(isKill)), + .addReg(InReg, + getKillRegState(isKill) | + getUndefRegState(isUndef)), FrameIndex); } else { // move -> load unsigned OutReg = MI->getOperand(0).getReg(); bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LWZ)) .addReg(OutReg, RegState::Define | - getDeadRegState(isDead)), + getDeadRegState(isDead) | + getUndefRegState(isUndef)), FrameIndex); } } else if ((Opc == PPC::OR8 && @@ -708,48 +729,63 @@ MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, if (OpNum == 0) { // move -> store unsigned InReg = MI->getOperand(1).getReg(); bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STD)) - .addReg(InReg, getKillRegState(isKill)), + .addReg(InReg, + getKillRegState(isKill) | + getUndefRegState(isUndef)), FrameIndex); } else { // move -> load unsigned OutReg = MI->getOperand(0).getReg(); bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LD)) .addReg(OutReg, RegState::Define | - getDeadRegState(isDead)), + getDeadRegState(isDead) | + getUndefRegState(isUndef)), FrameIndex); } } else if (Opc == PPC::FMRD) { if (OpNum == 0) { // move -> store unsigned InReg = MI->getOperand(1).getReg(); bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFD)) - .addReg(InReg, getKillRegState(isKill)), + .addReg(InReg, + getKillRegState(isKill) | + getUndefRegState(isUndef)), FrameIndex); } else { // move -> load unsigned OutReg = MI->getOperand(0).getReg(); bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFD)) .addReg(OutReg, RegState::Define | - getDeadRegState(isDead)), + getDeadRegState(isDead) | + getUndefRegState(isUndef)), FrameIndex); } } else if (Opc == PPC::FMRS) { if (OpNum == 0) { // move -> store unsigned InReg = MI->getOperand(1).getReg(); bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFS)) - .addReg(InReg, getKillRegState(isKill)), + .addReg(InReg, + getKillRegState(isKill) | + getUndefRegState(isUndef)), FrameIndex); } else { // move -> load unsigned OutReg = MI->getOperand(0).getReg(); bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFS)) .addReg(OutReg, RegState::Define | - getDeadRegState(isDead)), + getDeadRegState(isDead) | + getUndefRegState(isUndef)), FrameIndex); } } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 772e25a..7af59a2 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -107,17 +107,17 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; -def PPCcall_Macho : SDNode<"PPCISD::CALL_Macho", SDT_PPCCall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; -def PPCcall_ELF : SDNode<"PPCISD::CALL_ELF", SDT_PPCCall, +def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; +def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; -def PPCbctrl_Macho : SDNode<"PPCISD::BCTRL_Macho", SDTNone, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; +def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; -def PPCbctrl_ELF : SDNode<"PPCISD::BCTRL_ELF", SDTNone, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; +def PPCbctrl_SVR4 : SDNode<"PPCISD::BCTRL_SVR4", SDTNone, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInFlag]>; @@ -412,7 +412,7 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>; } -// Macho ABI Calls. +// Darwin ABI Calls. let isCall = 1, PPC970_Unit = 7, // All calls clobber the non-callee saved registers... Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12, @@ -424,26 +424,26 @@ let isCall = 1, PPC970_Unit = 7, CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN] in { // Convenient aliases for call instructions let Uses = [RM] in { - def BL_Macho : IForm<18, 0, 1, - (outs), (ins calltarget:$func, variable_ops), - "bl $func", BrB, []>; // See Pat patterns below. - def BLA_Macho : IForm<18, 1, 1, + def BL_Darwin : IForm<18, 0, 1, + (outs), (ins calltarget:$func, variable_ops), + "bl $func", BrB, []>; // See Pat patterns below. + def BLA_Darwin : IForm<18, 1, 1, (outs), (ins aaddr:$func, variable_ops), - "bla $func", BrB, [(PPCcall_Macho (i32 imm:$func))]>; + "bla $func", BrB, [(PPCcall_Darwin (i32 imm:$func))]>; } let Uses = [CTR, RM] in { - def BCTRL_Macho : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins variable_ops), - "bctrl", BrB, - [(PPCbctrl_Macho)]>, Requires<[In32BitMode]>; + def BCTRL_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, + (outs), (ins variable_ops), + "bctrl", BrB, + [(PPCbctrl_Darwin)]>, Requires<[In32BitMode]>; } } -// ELF ABI Calls. +// SVR4 ABI Calls. let isCall = 1, PPC970_Unit = 7, // All calls clobber the non-callee saved registers... - Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12, - F0,F1,F2,F3,F4,F5,F6,F7,F8, + Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12, + F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, LR,CTR, CR0,CR1,CR5,CR6,CR7, @@ -451,19 +451,19 @@ let isCall = 1, PPC970_Unit = 7, CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN] in { // Convenient aliases for call instructions let Uses = [RM] in { - def BL_ELF : IForm<18, 0, 1, + def BL_SVR4 : IForm<18, 0, 1, (outs), (ins calltarget:$func, variable_ops), "bl $func", BrB, []>; // See Pat patterns below. - def BLA_ELF : IForm<18, 1, 1, + def BLA_SVR4 : IForm<18, 1, 1, (outs), (ins aaddr:$func, variable_ops), "bla $func", BrB, - [(PPCcall_ELF (i32 imm:$func))]>; + [(PPCcall_SVR4 (i32 imm:$func))]>; } let Uses = [CTR, RM] in { - def BCTRL_ELF : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins variable_ops), - "bctrl", BrB, - [(PPCbctrl_ELF)]>, Requires<[In32BitMode]>; + def BCTRL_SVR4 : XLForm_2_ext<19, 528, 20, 0, 1, + (outs), (ins variable_ops), + "bctrl", BrB, + [(PPCbctrl_SVR4)]>, Requires<[In32BitMode]>; } } @@ -1389,14 +1389,14 @@ def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm), (RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>; // Calls -def : Pat<(PPCcall_Macho (i32 tglobaladdr:$dst)), - (BL_Macho tglobaladdr:$dst)>; -def : Pat<(PPCcall_Macho (i32 texternalsym:$dst)), - (BL_Macho texternalsym:$dst)>; -def : Pat<(PPCcall_ELF (i32 tglobaladdr:$dst)), - (BL_ELF tglobaladdr:$dst)>; -def : Pat<(PPCcall_ELF (i32 texternalsym:$dst)), - (BL_ELF texternalsym:$dst)>; +def : Pat<(PPCcall_Darwin (i32 tglobaladdr:$dst)), + (BL_Darwin tglobaladdr:$dst)>; +def : Pat<(PPCcall_Darwin (i32 texternalsym:$dst)), + (BL_Darwin texternalsym:$dst)>; +def : Pat<(PPCcall_SVR4 (i32 tglobaladdr:$dst)), + (BL_SVR4 tglobaladdr:$dst)>; +def : Pat<(PPCcall_SVR4 (i32 texternalsym:$dst)), + (BL_SVR4 texternalsym:$dst)>; def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index 035647e..7486d74 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -354,7 +354,7 @@ void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn, JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1) JCE.emitWordBE(0x7d6802a6); // mflr r11 JCE.emitWordBE(0xf9610060); // std r11, 96(r1) - } else if (TM.getSubtargetImpl()->isMachoABI()){ + } else if (TM.getSubtargetImpl()->isDarwinABI()){ JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1) JCE.emitWordBE(0x7d6802a6); // mflr r11 JCE.emitWordBE(0x91610028); // stw r11, 40(r1) diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index cb31506..97b1c57 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -149,7 +149,7 @@ const TargetRegisterClass *PPCRegisterInfo::getPointerRegClass() const { const unsigned* PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { // 32-bit Darwin calling convention. - static const unsigned Macho32_CalleeSavedRegs[] = { + static const unsigned Darwin32_CalleeSavedRegs[] = { PPC::R13, PPC::R14, PPC::R15, PPC::R16, PPC::R17, PPC::R18, PPC::R19, PPC::R20, PPC::R21, PPC::R22, PPC::R23, @@ -174,15 +174,13 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::LR, 0 }; - static const unsigned ELF32_CalleeSavedRegs[] = { - PPC::R13, PPC::R14, PPC::R15, + static const unsigned SVR4_CalleeSavedRegs[] = { + PPC::R14, PPC::R15, PPC::R16, PPC::R17, PPC::R18, PPC::R19, PPC::R20, PPC::R21, PPC::R22, PPC::R23, PPC::R24, PPC::R25, PPC::R26, PPC::R27, PPC::R28, PPC::R29, PPC::R30, PPC::R31, - PPC::F9, - PPC::F10, PPC::F11, PPC::F12, PPC::F13, PPC::F14, PPC::F15, PPC::F16, PPC::F17, PPC::F18, PPC::F19, PPC::F20, PPC::F21, PPC::F22, PPC::F23, PPC::F24, PPC::F25, @@ -190,6 +188,9 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::F30, PPC::F31, PPC::CR2, PPC::CR3, PPC::CR4, + + PPC::VRSAVE, + PPC::V20, PPC::V21, PPC::V22, PPC::V23, PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31, @@ -201,7 +202,7 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::LR, 0 }; // 64-bit Darwin calling convention. - static const unsigned Macho64_CalleeSavedRegs[] = { + static const unsigned Darwin64_CalleeSavedRegs[] = { PPC::X14, PPC::X15, PPC::X16, PPC::X17, PPC::X18, PPC::X19, PPC::X20, PPC::X21, PPC::X22, PPC::X23, @@ -226,18 +227,17 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::LR8, 0 }; - if (Subtarget.isMachoABI()) - return Subtarget.isPPC64() ? Macho64_CalleeSavedRegs : - Macho32_CalleeSavedRegs; + if (Subtarget.isDarwinABI()) + return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegs : + Darwin32_CalleeSavedRegs; - // ELF 32. - return ELF32_CalleeSavedRegs; + return SVR4_CalleeSavedRegs; } const TargetRegisterClass* const* PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - // 32-bit Macho calling convention. - static const TargetRegisterClass * const Macho32_CalleeSavedRegClasses[] = { + // 32-bit Darwin calling convention. + static const TargetRegisterClass * const Darwin32_CalleeSavedRegClasses[] = { &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, @@ -266,15 +266,13 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { &PPC::GPRCRegClass, 0 }; - static const TargetRegisterClass * const ELF32_CalleeSavedRegClasses[] = { - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, + static const TargetRegisterClass * const SVR4_CalleeSavedRegClasses[] = { + &PPC::GPRCRegClass,&PPC::GPRCRegClass, &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, @@ -283,6 +281,8 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass, + &PPC::VRSAVERCRegClass, + &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, @@ -297,8 +297,8 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { &PPC::GPRCRegClass, 0 }; - // 64-bit Macho calling convention. - static const TargetRegisterClass * const Macho64_CalleeSavedRegClasses[] = { + // 64-bit Darwin calling convention. + static const TargetRegisterClass * const Darwin64_CalleeSavedRegClasses[] = { &PPC::G8RCRegClass,&PPC::G8RCRegClass, &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, @@ -327,12 +327,11 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { &PPC::G8RCRegClass, 0 }; - if (Subtarget.isMachoABI()) - return Subtarget.isPPC64() ? Macho64_CalleeSavedRegClasses : - Macho32_CalleeSavedRegClasses; + if (Subtarget.isDarwinABI()) + return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegClasses : + Darwin32_CalleeSavedRegClasses; - // ELF 32. - return ELF32_CalleeSavedRegClasses; + return SVR4_CalleeSavedRegClasses; } // needsFP - Return true if the specified function should have a dedicated frame @@ -358,10 +357,12 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::LR8); Reserved.set(PPC::RM); - // In Linux, r2 is reserved for the OS. - if (!Subtarget.isDarwin()) - Reserved.set(PPC::R2); - + // The SVR4 ABI reserves r2 and r13 + if (Subtarget.isSVR4ABI()) { + Reserved.set(PPC::R2); // System-reserved register + Reserved.set(PPC::R13); // Small Data Area pointer register + } + // On PPC64, r13 is the thread pointer. Never allocate this register. Note // that this is over conservative, as it also prevents allocation of R31 when // the FP is not needed. @@ -909,6 +910,7 @@ void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const { // don't have a frame pointer, calls, or dynamic alloca then we do not need // to adjust the stack pointer (we fit in the Red Zone). bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone); + // FIXME SVR4 The SVR4 ABI has no red zone. if (!DisableRedZone && FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. @@ -925,7 +927,7 @@ void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const { // Maximum call frame needs to be at least big enough for linkage and 8 args. unsigned minCallFrameSize = PPCFrameInfo::getMinCallFrameSize(Subtarget.isPPC64(), - Subtarget.isMachoABI()); + Subtarget.isDarwinABI()); maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); // If we have dynamic alloca then maxCallFrameSize needs to be aligned so @@ -958,16 +960,15 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Save R31 if necessary int FPSI = FI->getFramePointerSaveIndex(); bool IsPPC64 = Subtarget.isPPC64(); - bool IsELF32_ABI = Subtarget.isELF32_ABI(); - bool IsMachoABI = Subtarget.isMachoABI(); + bool IsSVR4ABI = Subtarget.isSVR4ABI(); + bool isDarwinABI = Subtarget.isDarwinABI(); MachineFrameInfo *MFI = MF.getFrameInfo(); // If the frame pointer save index hasn't been defined yet. - if (!FPSI && (NoFramePointerElim || MFI->hasVarSizedObjects()) && - IsELF32_ABI) { + if (!FPSI && needsFP(MF) && IsSVR4ABI) { // Find out what the fix offset of the frame pointer save area. int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, - IsMachoABI); + isDarwinABI); // Allocate the frame index for frame pointer save area. FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset); // Save the result. @@ -976,11 +977,10 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Reserve stack space to move the linkage area to in case of a tail call. int TCSPDelta = 0; - if (PerformTailCallOpt && (TCSPDelta=FI->getTailCallSPDelta()) < 0) { - int AddFPOffsetAmount = IsELF32_ABI ? -4 : 0; - MF.getFrameInfo()->CreateFixedObject( -1 * TCSPDelta, - AddFPOffsetAmount + TCSPDelta); + if (PerformTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) { + MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta); } + // Reserve a slot closest to SP or frame pointer if we have a dynalloc or // a large stack, which will require scavenging a register to materialize a // large offset. @@ -999,6 +999,170 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } void +PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) + const { + // Early exit if not using the SVR4 ABI. + if (!Subtarget.isSVR4ABI()) { + return; + } + + // Get callee saved register information. + MachineFrameInfo *FFI = MF.getFrameInfo(); + const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo(); + + // Early exit if no callee saved registers are modified! + if (CSI.empty() && !needsFP(MF)) { + return; + } + + unsigned MinGPR = PPC::R31; + unsigned MinFPR = PPC::F31; + unsigned MinVR = PPC::V31; + + bool HasGPSaveArea = false; + bool HasFPSaveArea = false; + bool HasCRSaveArea = false; + bool HasVRSAVESaveArea = false; + bool HasVRSaveArea = false; + + SmallVector<CalleeSavedInfo, 18> GPRegs; + SmallVector<CalleeSavedInfo, 18> FPRegs; + SmallVector<CalleeSavedInfo, 18> VRegs; + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = CSI[i].getRegClass(); + + if (RC == PPC::GPRCRegisterClass) { + HasGPSaveArea = true; + + GPRegs.push_back(CSI[i]); + + if (Reg < MinGPR) { + MinGPR = Reg; + } + } else if (RC == PPC::F8RCRegisterClass) { + HasFPSaveArea = true; + + FPRegs.push_back(CSI[i]); + + if (Reg < MinFPR) { + MinFPR = Reg; + } +// FIXME SVR4: Disable CR save area for now. + } else if ( RC == PPC::CRBITRCRegisterClass + || RC == PPC::CRRCRegisterClass) { +// HasCRSaveArea = true; + } else if (RC == PPC::VRSAVERCRegisterClass) { + HasVRSAVESaveArea = true; + } else if (RC == PPC::VRRCRegisterClass) { + HasVRSaveArea = true; + + VRegs.push_back(CSI[i]); + + if (Reg < MinVR) { + MinVR = Reg; + } + } else { + assert(0 && "Unknown RegisterClass!"); + } + } + + PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); + + int64_t LowerBound = 0; + + // Take into account stack space reserved for tail calls. + int TCSPDelta = 0; + if (PerformTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { + LowerBound = TCSPDelta; + } + + // The Floating-point register save area is right below the back chain word + // of the previous stack frame. + if (HasFPSaveArea) { + for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { + int FI = FPRegs[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + + LowerBound -= (31 - getRegisterNumbering(MinFPR) + 1) * 8; + } + + // Check whether the frame pointer register is allocated. If so, make sure it + // is spilled to the correct offset. + if (needsFP(MF)) { + HasGPSaveArea = true; + + int FI = PFI->getFramePointerSaveIndex(); + assert(FI && "No Frame Pointer Save Slot!"); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + + // General register save area starts right below the Floating-point + // register save area. + if (HasGPSaveArea) { + // Move general register save area spill slots down, taking into account + // the size of the Floating-point register save area. + for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { + int FI = GPRegs[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + + LowerBound -= (31 - getRegisterNumbering(MinGPR) + 1) * 4; + } + + // The CR save area is below the general register save area. + if (HasCRSaveArea) { + // FIXME SVR4: Is it actually possible to have multiple elements in CSI + // which have the CR/CRBIT register class? + // Adjust the frame index of the CR spill slot. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + const TargetRegisterClass *RC = CSI[i].getRegClass(); + + if (RC == PPC::CRBITRCRegisterClass || RC == PPC::CRRCRegisterClass) { + int FI = CSI[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + } + + LowerBound -= 4; // The CR save area is always 4 bytes long. + } + + if (HasVRSAVESaveArea) { + // FIXME SVR4: Is it actually possible to have multiple elements in CSI + // which have the VRSAVE register class? + // Adjust the frame index of the VRSAVE spill slot. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + const TargetRegisterClass *RC = CSI[i].getRegClass(); + + if (RC == PPC::VRSAVERCRegisterClass) { + int FI = CSI[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + } + + LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. + } + + if (HasVRSaveArea) { + // Insert alignment padding, we need 16-byte alignment. + LowerBound = (LowerBound - 15) & ~(15); + + for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { + int FI = VRegs[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + } +} + +void PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -1033,15 +1197,26 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { // Get processor type. bool IsPPC64 = Subtarget.isPPC64(); // Get operating system - bool IsMachoABI = Subtarget.isMachoABI(); + bool isDarwinABI = Subtarget.isDarwinABI(); // Check if the link register (LR) must be saved. PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); bool MustSaveLR = FI->mustSaveLR(); // Do we have a frame pointer for this function? bool HasFP = hasFP(MF) && FrameSize; - int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, IsMachoABI); - int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, IsMachoABI); + int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI); + + int FPOffset = 0; + if (HasFP) { + if (Subtarget.isSVR4ABI()) { + MachineFrameInfo *FFI = MF.getFrameInfo(); + int FPIndex = FI->getFramePointerSaveIndex(); + assert(FPIndex && "No Frame Pointer Save Slot!"); + FPOffset = FFI->getObjectOffset(FPIndex); + } else { + FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isDarwinABI); + } + } if (IsPPC64) { if (MustSaveLR) @@ -1242,15 +1417,26 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, // Get processor type. bool IsPPC64 = Subtarget.isPPC64(); // Get operating system - bool IsMachoABI = Subtarget.isMachoABI(); + bool isDarwinABI = Subtarget.isDarwinABI(); // Check if the link register (LR) has been saved. PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); bool MustSaveLR = FI->mustSaveLR(); // Do we have a frame pointer for this function? bool HasFP = hasFP(MF) && FrameSize; - int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, IsMachoABI); - int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, IsMachoABI); + int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI); + + int FPOffset = 0; + if (HasFP) { + if (Subtarget.isSVR4ABI()) { + MachineFrameInfo *FFI = MF.getFrameInfo(); + int FPIndex = FI->getFramePointerSaveIndex(); + assert(FPIndex && "No Frame Pointer Save Slot!"); + FPOffset = FFI->getObjectOffset(FPIndex); + } else { + FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isDarwinABI); + } + } bool UsesTCRet = RetOpcode == PPC::TCRETURNri || RetOpcode == PPC::TCRETURNdi || diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 9506b65..ddaefdd 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -75,6 +75,8 @@ public: void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 9e15a55..bac8e3a 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -43,8 +43,9 @@ class VR<bits<5> num, string n> : PPCReg<n> { } // CR - One of the 8 4-bit condition registers -class CR<bits<3> num, string n> : PPCReg<n> { +class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> { field bits<3> Num = num; + let SubRegs = subregs; } // CRBIT - One of the 32 1-bit condition register fields @@ -189,16 +190,6 @@ def V29 : VR<29, "v29">, DwarfRegNum<[106]>; def V30 : VR<30, "v30">, DwarfRegNum<[107]>; def V31 : VR<31, "v31">, DwarfRegNum<[108]>; -// Condition registers -def CR0 : CR<0, "cr0">, DwarfRegNum<[68]>; -def CR1 : CR<1, "cr1">, DwarfRegNum<[69]>; -def CR2 : CR<2, "cr2">, DwarfRegNum<[70]>; -def CR3 : CR<3, "cr3">, DwarfRegNum<[71]>; -def CR4 : CR<4, "cr4">, DwarfRegNum<[72]>; -def CR5 : CR<5, "cr5">, DwarfRegNum<[73]>; -def CR6 : CR<6, "cr6">, DwarfRegNum<[74]>; -def CR7 : CR<7, "cr7">, DwarfRegNum<[75]>; - // Condition register bits def CR0LT : CRBIT< 0, "0">, DwarfRegNum<[0]>; def CR0GT : CRBIT< 1, "1">, DwarfRegNum<[0]>; @@ -233,6 +224,16 @@ def CR7GT : CRBIT<29, "29">, DwarfRegNum<[0]>; def CR7EQ : CRBIT<30, "30">, DwarfRegNum<[0]>; def CR7UN : CRBIT<31, "31">, DwarfRegNum<[0]>; +// Condition registers +def CR0 : CR<0, "cr0", [CR0LT, CR0GT, CR0EQ, CR0UN]>, DwarfRegNum<[68]>; +def CR1 : CR<1, "cr1", [CR1LT, CR1GT, CR1EQ, CR1UN]>, DwarfRegNum<[69]>; +def CR2 : CR<2, "cr2", [CR2LT, CR2GT, CR2EQ, CR2UN]>, DwarfRegNum<[70]>; +def CR3 : CR<3, "cr3", [CR3LT, CR3GT, CR3EQ, CR3UN]>, DwarfRegNum<[71]>; +def CR4 : CR<4, "cr4", [CR4LT, CR4GT, CR4EQ, CR4UN]>, DwarfRegNum<[72]>; +def CR5 : CR<5, "cr5", [CR5LT, CR5GT, CR5EQ, CR5UN]>, DwarfRegNum<[73]>; +def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74]>; +def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75]>; + def : SubRegSet<1, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7], [CR0LT, CR1LT, CR2LT, CR3LT, CR4LT, CR5LT, CR6LT, CR7LT]>; def : SubRegSet<2, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7], @@ -290,7 +291,12 @@ def GPRC : RegisterClass<"PPC", [i32], 32, // On PPC64, r13 is the thread pointer. Never allocate this register. // Note that this is overconservative, as it also prevents allocation of // R31 when the FP is not needed. - if (MF.getTarget().getSubtarget<PPCSubtarget>().isPPC64()) + // When using the SVR4 ABI, r13 is reserved for the Small Data Area + // pointer. + const PPCSubtarget &Subtarget + = MF.getTarget().getSubtarget<PPCSubtarget>(); + + if (Subtarget.isPPC64() || Subtarget.isSVR4ABI()) return end()-5; // don't allocate R13, R31, R0, R1, LR if (needsFP(MF)) @@ -324,22 +330,24 @@ def G8RC : RegisterClass<"PPC", [i64], 64, }]; } - - +// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4 +// ABI the size of the Floating-point register save area is determined by the +// allocated non-volatile register with the lowest register number, as FP +// register N is spilled to offset 8 * (32 - N) below the back chain word of the +// previous stack frame. By allocating non-volatiles in reverse order we make +// sure that the Floating-point register save area is always as small as +// possible because there aren't any unused spill slots. def F8RC : RegisterClass<"PPC", [f64], 64, [F0, F1, F2, F3, F4, F5, F6, F7, - F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, - F22, F23, F24, F25, F26, F27, F28, F29, F30, F31]>; + F8, F9, F10, F11, F12, F13, F31, F30, F29, F28, F27, F26, F25, F24, F23, + F22, F21, F20, F19, F18, F17, F16, F15, F14]>; def F4RC : RegisterClass<"PPC", [f32], 32, [F0, F1, F2, F3, F4, F5, F6, F7, - F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, - F22, F23, F24, F25, F26, F27, F28, F29, F30, F31]>; + F8, F9, F10, F11, F12, F13, F31, F30, F29, F28, F27, F26, F25, F24, F23, + F22, F21, F20, F19, F18, F17, F16, F15, F14]>; def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128, [V2, V3, V4, V5, V0, V1, - V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V20, V21, - V22, V23, V24, V25, V26, V27, V28, V29, V30, V31]>; - -def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2, - CR3, CR4]>; + V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V31, V30, + V29, V28, V27, V26, V25, V24, V23, V22, V21, V20]>; def CRBITRC : RegisterClass<"PPC", [i32], 32, [CR0LT, CR0GT, CR0EQ, CR0UN, @@ -355,6 +363,13 @@ def CRBITRC : RegisterClass<"PPC", [i32], 32, let CopyCost = -1; } +def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2, + CR3, CR4]> +{ + let SubRegClassList = [CRBITRC, CRBITRC, CRBITRC, CRBITRC]; +} def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>; def CTRRC8 : RegisterClass<"PPC", [i64], 64, [CTR8]>; +def VRSAVERC : RegisterClass<"PPC", [i32], 32, [VRSAVE]>; + diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 176f3e1..f633cc6 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -148,8 +148,8 @@ public: /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard. unsigned getDarwinVers() const { return DarwinVers; } - bool isMachoABI() const { return isDarwin() || IsPPC64; } - bool isELF32_ABI() const { return !isDarwin() && !IsPPC64; } + bool isDarwinABI() const { return isDarwin() || IsPPC64; } + bool isSVR4ABI() const { return !isDarwin() && !IsPPC64; } unsigned getAsmFlavor() const { return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0; diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 2f95d7e..e9073d6 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -152,7 +152,7 @@ bool PPCTargetMachine::addAssemblyEmitter(PassManagerBase &PM, raw_ostream &Out) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); + PM.add(AsmPrinterCtor(Out, *this, Verbose)); return false; } @@ -183,7 +183,7 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; @@ -215,7 +215,7 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; @@ -230,7 +230,7 @@ bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; @@ -245,7 +245,7 @@ bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 086d2f4..c693bf4 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -46,7 +46,6 @@ protected: // set this functions to ctor pointer at startup time if they are linked in. typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, PPCTargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose); static AsmPrinterCtorFn AsmPrinterCtor; diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index 688fb30..6e9e6c7 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -180,6 +180,16 @@ void bar() { struct foo R = { 1.0, 2.0 }; xxx(R); } ===-------------------------------------------------------------------------=== +Darwin Stub removal: + +We still generate calls to foo$stub, and stubs, on Darwin. This is not +necessary when building with the Leopard (10.5) or later linker, as stubs are +generated by ld when necessary. Parameterizing this based on the deployment +target (-mmacosx-version-min) is probably enough. x86-32 does this right, see +its logic. + +===-------------------------------------------------------------------------=== + Darwin Stub LICM optimization: Loops like this: |