diff options
Diffstat (limited to 'lib/Target/X86/X86FrameLowering.cpp')
-rw-r--r-- | lib/Target/X86/X86FrameLowering.cpp | 625 |
1 files changed, 431 insertions, 194 deletions
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index ed45a9a..d54f4ae 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -15,6 +15,7 @@ #include "X86InstrBuilder.h" #include "X86InstrInfo.h" #include "X86MachineFunctionInfo.h" +#include "X86Subtarget.h" #include "X86TargetMachine.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -91,12 +92,12 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, return 0; static const unsigned CallerSavedRegs32Bit[] = { - X86::EAX, X86::EDX, X86::ECX + X86::EAX, X86::EDX, X86::ECX, 0 }; static const unsigned CallerSavedRegs64Bit[] = { X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI, - X86::R8, X86::R9, X86::R10, X86::R11 + X86::R8, X86::R9, X86::R10, X86::R11, 0 }; unsigned Opc = MBBI->getOpcode(); @@ -283,8 +284,8 @@ static bool isEAXLiveIn(MachineFunction &MF) { } void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, - MCSymbol *Label, - unsigned FramePtr) const { + MCSymbol *Label, + unsigned FramePtr) const { MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); @@ -346,6 +347,247 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, } } +/// getCompactUnwindRegNum - Get the compact unwind number for a given +/// register. The number corresponds to the enum lists in +/// compact_unwind_encoding.h. +static int getCompactUnwindRegNum(const unsigned *CURegs, unsigned Reg) { + int Idx = 1; + for (; *CURegs; ++CURegs, ++Idx) + if (*CURegs == Reg) + return Idx; + + return -1; +} + +/// encodeCompactUnwindRegistersWithoutFrame - Create the permutation encoding +/// used with frameless stacks. It is passed the number of registers to be saved +/// and an array of the registers saved. +static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6], + unsigned RegCount, + bool Is64Bit) { + // The saved registers are numbered from 1 to 6. In order to encode the order + // in which they were saved, we re-number them according to their place in the + // register order. The re-numbering is relative to the last re-numbered + // register. E.g., if we have registers {6, 2, 4, 5} saved in that order: + // + // Orig Re-Num + // ---- ------ + // 6 6 + // 2 2 + // 4 3 + // 5 3 + // + static const unsigned CU32BitRegs[] = { + X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 + }; + static const unsigned CU64BitRegs[] = { + X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 + }; + const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs); + + uint32_t RenumRegs[6]; + for (unsigned i = 6 - RegCount; i < 6; ++i) { + int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]); + if (CUReg == -1) return ~0U; + SavedRegs[i] = CUReg; + + unsigned Countless = 0; + for (unsigned j = 6 - RegCount; j < i; ++j) + if (SavedRegs[j] < SavedRegs[i]) + ++Countless; + + RenumRegs[i] = SavedRegs[i] - Countless - 1; + } + + // Take the renumbered values and encode them into a 10-bit number. + uint32_t permutationEncoding = 0; + switch (RegCount) { + case 6: + permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] + + 6 * RenumRegs[2] + 2 * RenumRegs[3] + + RenumRegs[4]; + break; + case 5: + permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2] + + 6 * RenumRegs[3] + 2 * RenumRegs[4] + + RenumRegs[5]; + break; + case 4: + permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3] + + 3 * RenumRegs[4] + RenumRegs[5]; + break; + case 3: + permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4] + + RenumRegs[5]; + break; + case 2: + permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5]; + break; + case 1: + permutationEncoding |= RenumRegs[5]; + break; + } + + assert((permutationEncoding & 0x3FF) == permutationEncoding && + "Invalid compact register encoding!"); + return permutationEncoding; +} + +/// encodeCompactUnwindRegistersWithFrame - Return the registers encoded for a +/// compact encoding with a frame pointer. +static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[6], + bool Is64Bit) { + static const unsigned CU32BitRegs[] = { + X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 + }; + static const unsigned CU64BitRegs[] = { + X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 + }; + const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs); + + // Encode the registers in the order they were saved, 3-bits per register. The + // registers are numbered from 1 to 6. + uint32_t RegEnc = 0; + for (int I = 5; I >= 0; --I) { + unsigned Reg = SavedRegs[I]; + if (Reg == 0) break; + int CURegNum = getCompactUnwindRegNum(CURegs, Reg); + if (CURegNum == -1) + return ~0U; + RegEnc |= (CURegNum & 0x7) << (5 - I); + } + + assert((RegEnc & 0x7FFF) == RegEnc && "Invalid compact register encoding!"); + return RegEnc; +} + +uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { + const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); + unsigned FramePtr = RegInfo->getFrameRegister(MF); + unsigned StackPtr = RegInfo->getStackRegister(); + + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + + bool Is64Bit = STI.is64Bit(); + bool HasFP = hasFP(MF); + + unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 }; + int SavedRegIdx = 6; + + unsigned OffsetSize = (Is64Bit ? 8 : 4); + + unsigned PushInstr = (Is64Bit ? X86::PUSH64r : X86::PUSH32r); + unsigned PushInstrSize = 1; + unsigned MoveInstr = (Is64Bit ? X86::MOV64rr : X86::MOV32rr); + unsigned MoveInstrSize = (Is64Bit ? 3 : 2); + unsigned SubtractInstr = getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta); + unsigned SubtractInstrIdx = (Is64Bit ? 3 : 2); + + unsigned StackDivide = (Is64Bit ? 8 : 4); + + unsigned InstrOffset = 0; + unsigned CFAOffset = 0; + unsigned StackAdjust = 0; + + MachineBasicBlock &MBB = MF.front(); // Prologue is in entry BB. + bool ExpectEnd = false; + for (MachineBasicBlock::iterator + MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ++MBBI) { + MachineInstr &MI = *MBBI; + unsigned Opc = MI.getOpcode(); + if (Opc == X86::PROLOG_LABEL) continue; + if (!MI.getFlag(MachineInstr::FrameSetup)) break; + + // We don't exect any more prolog instructions. + if (ExpectEnd) return 0; + + if (Opc == PushInstr) { + // If there are too many saved registers, we cannot use compact encoding. + if (--SavedRegIdx < 0) return 0; + + SavedRegs[SavedRegIdx] = MI.getOperand(0).getReg(); + CFAOffset += OffsetSize; + InstrOffset += PushInstrSize; + } else if (Opc == MoveInstr) { + unsigned SrcReg = MI.getOperand(1).getReg(); + unsigned DstReg = MI.getOperand(0).getReg(); + + if (DstReg != FramePtr || SrcReg != StackPtr) + return 0; + + CFAOffset = 0; + memset(SavedRegs, 0, sizeof(SavedRegs)); + InstrOffset += MoveInstrSize; + } else if (Opc == SubtractInstr) { + if (StackAdjust) + // We all ready have a stack pointer adjustment. + return 0; + + if (!MI.getOperand(0).isReg() || + MI.getOperand(0).getReg() != MI.getOperand(1).getReg() || + MI.getOperand(0).getReg() != StackPtr || !MI.getOperand(2).isImm()) + // We need this to be a stack adjustment pointer. Something like: + // + // %RSP<def> = SUB64ri8 %RSP, 48 + return 0; + + StackAdjust = MI.getOperand(2).getImm() / StackDivide; + SubtractInstrIdx += InstrOffset; + ExpectEnd = true; + } + } + + // Encode that we are using EBP/RBP as the frame pointer. + uint32_t CompactUnwindEncoding = 0; + CFAOffset /= StackDivide; + if (HasFP) { + if ((CFAOffset & 0xFF) != CFAOffset) + // Offset was too big for compact encoding. + return 0; + + // Get the encoding of the saved registers when we have a frame pointer. + uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit); + if (RegEnc == ~0U) + return 0; + + CompactUnwindEncoding |= 0x01000000; + CompactUnwindEncoding |= (CFAOffset & 0xFF) << 16; + CompactUnwindEncoding |= RegEnc & 0x7FFF; + } else { + unsigned FullOffset = CFAOffset + StackAdjust; + if ((FullOffset & 0xFF) == FullOffset) { + // Frameless stack. + CompactUnwindEncoding |= 0x02000000; + CompactUnwindEncoding |= (FullOffset & 0xFF) << 16; + } else { + if ((CFAOffset & 0x7) != CFAOffset) + // The extra stack adjustments are too big for us to handle. + return 0; + + // Frameless stack with an offset too large for us to encode compactly. + CompactUnwindEncoding |= 0x03000000; + + // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP' + // instruction. + CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16; + + // Encode any extra stack stack changes (done via push instructions). + CompactUnwindEncoding |= (CFAOffset & 0x7) << 13; + } + + // Get the encoding of the saved registers when we don't have a frame + // pointer. + uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegs, + 6 - SavedRegIdx, + Is64Bit); + if (RegEnc == ~0U) return 0; + CompactUnwindEncoding |= RegEnc & 0x3FF; + } + + return CompactUnwindEncoding; +} + /// emitPrologue - Push callee-saved registers onto the stack, which /// automatically adjust the stack pointer. Adjust the stack pointer to allocate /// space for local variables. Also emit labels used by the exception handler to @@ -370,7 +612,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned SlotSize = RegInfo->getSlotSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); unsigned StackPtr = RegInfo->getStackRegister(); - DebugLoc DL; // If we're forcing a stack realignment we can't rely on just the frame @@ -398,7 +639,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { !RegInfo->needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. - !IsWin64) { // Win64 has no Red Zone + !IsWin64 && // Win64 has no Red Zone + !EnableSegmentedStacks) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); @@ -459,7 +701,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (needsFrameMoves) { // Mark the place where EBP/RBP was saved. MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)) + .addSym(FrameLabel); // Define the current CFA rule to use the provided offset. if (StackSize) { @@ -478,7 +721,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc)); } - // Update EBP with the new base value... + // Update EBP with the new base value. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) .addReg(StackPtr) @@ -487,7 +730,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (needsFrameMoves) { // Mark effective beginning of when frame pointer becomes valid. MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)) + .addSym(FrameLabel); // Define the current CFA to use the EBP/RBP register. MachineLocation FPDst(FramePtr); @@ -504,8 +748,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (RegInfo->needsStackRealignment(MF)) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), - StackPtr).addReg(StackPtr).addImm(-MaxAlign); + TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr) + .addReg(StackPtr) + .addImm(-MaxAlign) + .setMIFlag(MachineInstr::FrameSetup); // The EFLAGS implicit def is dead. MI->getOperand(3).setIsDead(); @@ -522,6 +768,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { (MBBI->getOpcode() == X86::PUSH32r || MBBI->getOpcode() == X86::PUSH64r)) { PushedRegs = true; + MBBI->setFlag(MachineInstr::FrameSetup); ++MBBI; if (!HasFP && needsFrameMoves) { @@ -530,8 +777,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); // Define the current CFA rule to use the provided offset. - unsigned Ptr = StackSize ? - MachineLocation::VirtualFP : StackPtr; + unsigned Ptr = StackSize ? MachineLocation::VirtualFP : StackPtr; MachineLocation SPDst(Ptr); MachineLocation SPSrc(Ptr, StackOffset); Moves.push_back(MachineMove(Label, SPDst, SPSrc)); @@ -586,26 +832,30 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // Save EAX BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) - .addReg(X86::EAX, RegState::Kill); + .addReg(X86::EAX, RegState::Kill) + .setMIFlag(MachineInstr::FrameSetup); } if (Is64Bit) { // Handle the 64-bit Windows ABI case where we need to call __chkstk. // Function prologue is responsible for adjusting the stack pointer. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) - .addImm(NumBytes); + .addImm(NumBytes) + .setMIFlag(MachineInstr::FrameSetup); } else { // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. // We'll also use 4 already allocated bytes for EAX. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(isEAXAlive ? NumBytes - 4 : NumBytes); + .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) + .setMIFlag(MachineInstr::FrameSetup); } BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32)) .addExternalSymbol(StackProbeSymbol) .addReg(StackPtr, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit) + .setMIFlag(MachineInstr::FrameSetup); // MSVC x64's __chkstk needs to adjust %rsp. // FIXME: %rax preserves the offset and should be available. @@ -618,6 +868,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX), StackPtr, false, NumBytes - 4); + MI->setFlag(MachineInstr::FrameSetup); MBB.insert(MBBI, MI); } } else if (NumBytes) @@ -627,7 +878,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) { // Mark end of stack pointer adjustment. MCSymbol *Label = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); + BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)) + .addSym(Label); if (!HasFP && NumBytes) { // Define the current CFA rule to use the provided offset. @@ -647,6 +899,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { if (PushedRegs) emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr); } + + // Darwin 10.7 and greater has support for compact unwind encoding. + if (STI.getTargetTriple().isMacOSX() && + !STI.getTargetTriple().isMacOSXVersionLT(10, 7)) + MMI.setCompactUnwindEncoding(getCompactUnwindEncoding(MF)); } void X86FrameLowering::emitEpilogue(MachineFunction &MF, @@ -844,23 +1101,6 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } } -void -X86FrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const { - // Calculate amount of bytes used for return address storing - int stackGrowth = (STI.is64Bit() ? -8 : -4); - const X86RegisterInfo *RI = TM.getRegisterInfo(); - - // Initial state of the frame pointer is esp+stackGrowth. - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(RI->getStackRegister(), stackGrowth); - Moves.push_back(MachineMove(0, Dst, Src)); - - // Add return address to move list - MachineLocation CSDst(RI->getStackRegister(), stackGrowth); - MachineLocation CSSrc(RI->getRARegister()); - Moves.push_back(MachineMove(0, CSDst, CSSrc)); -} - int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const { const X86RegisterInfo *RI = static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo()); @@ -873,9 +1113,7 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) con // Skip the saved EBP. Offset += RI->getSlotSize(); } else { - unsigned Align = MFI->getObjectAlignment(FI); - assert((-(Offset + StackSize)) % Align == 0); - Align = 0; + assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); return Offset + StackSize; } // FIXME: Support tail calls @@ -1027,184 +1265,183 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, true); assert(FrameIdx == MFI->getObjectIndexBegin() && "Slot for EBP register must be last in order to be found!"); - FrameIdx = 0; + (void)FrameIdx; } } -/// permuteEncode - Create the permutation encoding used with frameless -/// stacks. It is passed the number of registers to be saved and an array of the -/// registers saved. -static uint32_t permuteEncode(unsigned SavedCount, unsigned Registers[6]) { - // The saved registers are numbered from 1 to 6. In order to encode the order - // in which they were saved, we re-number them according to their place in the - // register order. The re-numbering is relative to the last re-numbered - // register. E.g., if we have registers {6, 2, 4, 5} saved in that order: - // - // Orig Re-Num - // ---- ------ - // 6 6 - // 2 2 - // 4 3 - // 5 3 - // - bool Used[7] = { false, false, false, false, false, false, false }; - uint32_t RenumRegs[6]; - for (unsigned I = 0; I < SavedCount; ++I) { - uint32_t Renum = 0; - for (unsigned U = 1; U < 7; ++U) { - if (U == Registers[I]) - break; - if (!Used[U]) - ++Renum; - } - - Used[Registers[I]] = true; - RenumRegs[I] = Renum; +static bool +HasNestArgument(const MachineFunction *MF) { + const Function *F = MF->getFunction(); + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; I++) { + if (I->hasNestAttr()) + return true; } + return false; +} - // Take the renumbered values and encode them into a 10-bit number. - uint32_t permutationEncoding = 0; - switch (SavedCount) { - case 6: - permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] - + 6 * RenumRegs[2] + 2 * RenumRegs[3] - + RenumRegs[4]; - break; - case 5: - permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] - + 6 * RenumRegs[2] + 2 * RenumRegs[3] - + RenumRegs[4]; - break; - case 4: - permutationEncoding |= 60 * RenumRegs[0] + 12 * RenumRegs[1] - + 3 * RenumRegs[2] + RenumRegs[3]; - break; - case 3: - permutationEncoding |= 20 * RenumRegs[0] + 4 * RenumRegs[1] - + RenumRegs[2]; - break; - case 2: - permutationEncoding |= 5 * RenumRegs[0] + RenumRegs[1]; - break; - case 1: - permutationEncoding |= RenumRegs[0]; - break; +static unsigned +GetScratchRegister(bool Is64Bit, const MachineFunction &MF) { + if (Is64Bit) { + return X86::R11; + } else { + CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); + bool IsNested = HasNestArgument(&MF); + + if (CallingConvention == CallingConv::X86_FastCall) { + if (IsNested) { + report_fatal_error("Segmented stacks does not support fastcall with " + "nested function."); + return -1; + } else { + return X86::EAX; + } + } else { + if (IsNested) + return X86::EDX; + else + return X86::ECX; + } } - - return permutationEncoding; } -uint32_t X86FrameLowering:: -getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs, - int DataAlignmentFactor, bool IsEH) const { - uint32_t Encoding = 0; - int CFAOffset = 0; - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 }; - unsigned SavedRegIdx = 0; - int FramePointerReg = -1; +void +X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { + MachineBasicBlock &prologueMBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const X86InstrInfo &TII = *TM.getInstrInfo(); + uint64_t StackSize; + bool Is64Bit = STI.is64Bit(); + unsigned TlsReg, TlsOffset; + DebugLoc DL; + const X86Subtarget *ST = &MF.getTarget().getSubtarget<X86Subtarget>(); - for (ArrayRef<MCCFIInstruction>::const_iterator - I = Instrs.begin(), E = Instrs.end(); I != E; ++I) { - const MCCFIInstruction &Inst = *I; - MCSymbol *Label = Inst.getLabel(); + unsigned ScratchReg = GetScratchRegister(Is64Bit, MF); + assert(!MF.getRegInfo().isLiveIn(ScratchReg) && + "Scratch register is live-in"); - // Ignore invalid labels. - if (Label && !Label->isDefined()) continue; + if (MF.getFunction()->isVarArg()) + report_fatal_error("Segmented stacks do not support vararg functions."); + if (!ST->isTargetLinux()) + report_fatal_error("Segmented stacks supported only on linux."); - unsigned Operation = Inst.getOperation(); - if (Operation != MCCFIInstruction::Move && - Operation != MCCFIInstruction::RelMove) - // FIXME: We can't handle this frame just yet. - return 0; - - const MachineLocation &Dst = Inst.getDestination(); - const MachineLocation &Src = Inst.getSource(); - const bool IsRelative = (Operation == MCCFIInstruction::RelMove); - - if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { - if (Src.getReg() != MachineLocation::VirtualFP) { - // DW_CFA_def_cfa - assert(FramePointerReg == -1 &&"Defining more than one frame pointer?"); - if (TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::EBP && - TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::RBP) - // The frame pointer isn't EBP/RBP. Cannot make unwind information - // compact. - return 0; - FramePointerReg = TRI->getCompactUnwindRegNum(Src.getReg(), IsEH); - } // else DW_CFA_def_cfa_offset - - if (IsRelative) - CFAOffset += Src.getOffset(); - else - CFAOffset -= Src.getOffset(); + MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); + MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + bool IsNested = false; - continue; - } + // We need to know if the function has a nest argument only in 64 bit mode. + if (Is64Bit) + IsNested = HasNestArgument(&MF); - if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { - // DW_CFA_def_cfa_register - assert(FramePointerReg == -1 && "Defining more than one frame pointer?"); + // The MOV R10, RAX needs to be in a different block, since the RET we emit in + // allocMBB needs to be last (terminating) instruction. + MachineBasicBlock *restoreR10MBB = NULL; + if (IsNested) + restoreR10MBB = MF.CreateMachineBasicBlock(); - if (TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::EBP && - TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::RBP) - // The frame pointer isn't EBP/RBP. Cannot make unwind information - // compact. - return 0; + for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(), + e = prologueMBB.livein_end(); i != e; i++) { + allocMBB->addLiveIn(*i); + checkMBB->addLiveIn(*i); - FramePointerReg = TRI->getCompactUnwindRegNum(Dst.getReg(), IsEH); - if (SavedRegIdx != 1 || SavedRegs[0] != unsigned(FramePointerReg)) - return 0; + if (IsNested) + restoreR10MBB->addLiveIn(*i); + } - SavedRegs[0] = 0; - SavedRegIdx = 0; - continue; - } + if (IsNested) { + allocMBB->addLiveIn(X86::R10); + restoreR10MBB->addLiveIn(X86::RAX); + } - unsigned Reg = Src.getReg(); - int Offset = Dst.getOffset(); - if (IsRelative) - Offset -= CFAOffset; - Offset /= DataAlignmentFactor; + if (IsNested) + MF.push_front(restoreR10MBB); + MF.push_front(allocMBB); + MF.push_front(checkMBB); + + // Eventually StackSize will be calculated by a link-time pass; which will + // also decide whether checking code needs to be injected into this particular + // prologue. + StackSize = MFI->getStackSize(); + + // Read the limit off the current stacklet off the stack_guard location. + if (Is64Bit) { + TlsReg = X86::FS; + TlsOffset = 0x70; + + BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP) + .addImm(0).addReg(0).addImm(-StackSize).addReg(0); + BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg) + .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); + } else { + TlsReg = X86::GS; + TlsOffset = 0x30; - if (Offset < 0) { - // FIXME: Handle? - // DW_CFA_offset_extended_sf - return 0; - } else if (Reg < 64) { - // DW_CFA_offset + Reg - if (SavedRegIdx >= 6) return 0; - int CURegNum = TRI->getCompactUnwindRegNum(Reg, IsEH); - if (CURegNum == -1) return 0; - SavedRegs[SavedRegIdx++] = CURegNum; - } else { - // FIXME: Handle? - // DW_CFA_offset_extended - return 0; - } + BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) + .addImm(0).addReg(0).addImm(-StackSize).addReg(0); + BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) + .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); } - // Bail if there are too many registers to encode. - if (SavedRegIdx > 6) return 0; + // This jump is taken if SP >= (Stacklet Limit + Stack Space required). + // It jumps to normal execution of the function body. + BuildMI(checkMBB, DL, TII.get(X86::JG_4)).addMBB(&prologueMBB); + + // On 32 bit we first push the arguments size and then the frame size. On 64 + // bit, we pass the stack frame size in r10 and the argument size in r11. + if (Is64Bit) { + // Functions with nested arguments use R10, so it needs to be saved across + // the call to _morestack + + if (IsNested) + BuildMI(allocMBB, DL, TII.get(X86::MOV64rr), X86::RAX).addReg(X86::R10); + + BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R10) + .addImm(StackSize); + BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R11) + .addImm(X86FI->getArgumentStackSize()); + MF.getRegInfo().setPhysRegUsed(X86::R10); + MF.getRegInfo().setPhysRegUsed(X86::R11); + } else { + // Since we'll call __morestack, stack alignment needs to be preserved. + BuildMI(allocMBB, DL, TII.get(X86::SUB32ri), X86::ESP).addReg(X86::ESP) + .addImm(8); + BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) + .addImm(X86FI->getArgumentStackSize()); + BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) + .addImm(StackSize); + } - // Check if the offset is too big. - CFAOffset /= 4; - if ((CFAOffset & 0xFF) != CFAOffset) - return 0; - Encoding |= (CFAOffset & 0xFF) << 16; // Size encoding. - - if (FramePointerReg != -1) { - Encoding |= 0x01000000; // EBP/RBP Unwind Frame - for (unsigned I = 0; I != SavedRegIdx; ++I) { - unsigned Reg = SavedRegs[I]; - if (Reg == unsigned(FramePointerReg)) continue; - Encoding |= (Reg & 0x7) << (I * 3); // Register encoding - } + // __morestack is in libgcc + if (Is64Bit) + BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32)) + .addExternalSymbol("__morestack"); + else + BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32)) + .addExternalSymbol("__morestack"); + + // __morestack only seems to remove 8 bytes off the stack. Add back the + // additional 8 bytes we added before pushing the arguments. + if (!Is64Bit) + BuildMI(allocMBB, DL, TII.get(X86::ADD32ri), X86::ESP).addReg(X86::ESP) + .addImm(8); + BuildMI(allocMBB, DL, TII.get(X86::RET)); + + if (IsNested) + BuildMI(restoreR10MBB, DL, TII.get(X86::MOV64rr), X86::R10) + .addReg(X86::RAX); + + if (IsNested) { + allocMBB->addSuccessor(restoreR10MBB); + restoreR10MBB->addSuccessor(&prologueMBB); } else { - Encoding |= 0x02000000; // Frameless unwind with small stack - Encoding |= (SavedRegIdx & 0x7) << 10; - Encoding |= permuteEncode(SavedRegIdx, SavedRegs); + allocMBB->addSuccessor(&prologueMBB); } - return Encoding; + checkMBB->addSuccessor(allocMBB); + checkMBB->addSuccessor(&prologueMBB); + +#ifdef XDEBUG + MF.verify(); +#endif } |