diff options
Diffstat (limited to 'contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp | 415 |
1 files changed, 270 insertions, 145 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 3624377..45c2c30 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -47,7 +47,7 @@ ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti) /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); // iOS requires FP not to be clobbered for backtracing purpose. if (STI.isTargetIOS()) @@ -137,12 +137,27 @@ static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, } static int sizeOfSPAdjustment(const MachineInstr *MI) { - assert(MI->getOpcode() == ARM::VSTMDDB_UPD); + int RegSize; + switch (MI->getOpcode()) { + case ARM::VSTMDDB_UPD: + RegSize = 8; + break; + case ARM::STMDB_UPD: + case ARM::t2STMDB_UPD: + RegSize = 4; + break; + case ARM::t2STR_PRE: + case ARM::STR_PRE_IMM: + return 4; + default: + llvm_unreachable("Unknown push or pop like instruction"); + } + int count = 0; // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ // pred) so the list starts at 4. for (int i = MI->getNumOperands() - 1; i >= 4; --i) - count += 8; + count += RegSize; return count; } @@ -154,6 +169,110 @@ static bool WindowsRequiresStackProbe(const MachineFunction &MF, return StackSizeInBytes >= 4096; } +namespace { +struct StackAdjustingInsts { + struct InstInfo { + MachineBasicBlock::iterator I; + unsigned SPAdjust; + bool BeforeFPSet; + }; + + SmallVector<InstInfo, 4> Insts; + + void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust, + bool BeforeFPSet = false) { + InstInfo Info = {I, SPAdjust, BeforeFPSet}; + Insts.push_back(Info); + } + + void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) { + auto Info = std::find_if(Insts.begin(), Insts.end(), + [&](InstInfo &Info) { return Info.I == I; }); + assert(Info != Insts.end() && "invalid sp adjusting instruction"); + Info->SPAdjust += ExtraBytes; + } + + void emitDefCFAOffsets(MachineModuleInfo &MMI, MachineBasicBlock &MBB, + DebugLoc dl, const ARMBaseInstrInfo &TII, bool HasFP) { + unsigned CFAOffset = 0; + for (auto &Info : Insts) { + if (HasFP && !Info.BeforeFPSet) + return; + + CFAOffset -= Info.SPAdjust; + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, std::next(Info.I), dl, + TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } + } +}; +} + +/// Emit an instruction sequence that will align the address in +/// register Reg by zero-ing out the lower bits. For versions of the +/// architecture that support Neon, this must be done in a single +/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a +/// single instruction. That function only gets called when optimizing +/// spilling of D registers on a core with the Neon instruction set +/// present. +static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, + const TargetInstrInfo &TII, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL, const unsigned Reg, + const unsigned Alignment, + const bool MustBeSingleInstruction) { + const ARMSubtarget &AST = MF.getTarget().getSubtarget<ARMSubtarget>(); + const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops(); + const unsigned AlignMask = Alignment - 1; + const unsigned NrBitsToZero = countTrailingZeros(Alignment); + assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported"); + if (!AFI->isThumbFunction()) { + // if the BFC instruction is available, use that to zero the lower + // bits: + // bfc Reg, #0, log2(Alignment) + // otherwise use BIC, if the mask to zero the required number of bits + // can be encoded in the bic immediate field + // bic Reg, Reg, Alignment-1 + // otherwise, emit + // lsr Reg, Reg, log2(Alignment) + // lsl Reg, Reg, log2(Alignment) + if (CanUseBFC) { + AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg) + .addReg(Reg, RegState::Kill) + .addImm(~AlignMask)); + } else if (AlignMask <= 255) { + AddDefaultCC( + AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg) + .addReg(Reg, RegState::Kill) + .addImm(AlignMask))); + } else { + assert(!MustBeSingleInstruction && + "Shouldn't call emitAligningInstructions demanding a single " + "instruction to be emitted for large stack alignment for a target " + "without BFC."); + AddDefaultCC(AddDefaultPred( + BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) + .addReg(Reg, RegState::Kill) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero)))); + AddDefaultCC(AddDefaultPred( + BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) + .addReg(Reg, RegState::Kill) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero)))); + } + } else { + // Since this is only reached for Thumb-2 targets, the BFC instruction + // should always be available. + assert(CanUseBFC); + AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg) + .addReg(Reg, RegState::Kill) + .addImm(~AlignMask)); + } +} + void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -163,20 +282,20 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MCContext &Context = MMI.getContext(); const TargetMachine &TM = MF.getTarget(); const MCRegisterInfo *MRI = Context.getRegisterInfo(); - const ARMBaseRegisterInfo *RegInfo = - static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo()); - const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); + const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( + TM.getSubtargetImpl()->getRegisterInfo()); + const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>( + TM.getSubtargetImpl()->getInstrInfo()); assert(!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned Align = TM.getFrameLowering()->getStackAlignment(); + unsigned Align = + TM.getSubtargetImpl()->getFrameLowering()->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); unsigned FramePtr = RegInfo->getFrameRegister(MF); - int CFAOffset = 0; // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. @@ -189,15 +308,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; + StackAdjustingInsts DefCFAOffsetCandidates; + // Allocate the vararg register save area. if (ArgRegsSaveSize) { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize, MachineInstr::FrameSetup); - CFAOffset -= ArgRegsSaveSize; - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true); } if (!AFI->hasStackFrame() && @@ -205,11 +322,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { if (NumBytes - ArgRegsSaveSize != 0) { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize), MachineInstr::FrameSetup); - CFAOffset -= NumBytes - ArgRegsSaveSize; - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + DefCFAOffsetCandidates.addInst(std::prev(MBBI), + NumBytes - ArgRegsSaveSize, true); } return; } @@ -252,21 +366,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { } // Move past area 1. - MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push, - DPRCSPush; - if (GPRCS1Size > 0) + MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push; + if (GPRCS1Size > 0) { GPRCS1Push = LastPush = MBBI++; + DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true); + } // Determine starting offsets of spill areas. bool HasFP = hasFP(MF); - unsigned DPRCSOffset = NumBytes - (ArgRegsSaveSize + GPRCS1Size - + GPRCS2Size + DPRCSSize); - unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; - unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; + unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size; + unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size; + unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U; + unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign; + unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; int FramePtrOffsetInPush = 0; if (HasFP) { - FramePtrOffsetInPush = MFI->getObjectOffset(FramePtrSpillFI) - + GPRCS1Size + ArgRegsSaveSize; + FramePtrOffsetInPush = + MFI->getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize; AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); } @@ -275,16 +391,32 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); // Move past area 2. - if (GPRCS2Size > 0) + if (GPRCS2Size > 0) { GPRCS2Push = LastPush = MBBI++; + DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); + } + + // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our + // .cfi_offset operations will reflect that. + if (DPRGapSize) { + assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs"); + if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, DPRGapSize)) + DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize); + else { + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize, + MachineInstr::FrameSetup); + DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize); + } + } // Move past area 3. if (DPRCSSize > 0) { - DPRCSPush = MBBI; // Since vpush register list cannot have gaps, there may be multiple vpush // instructions in the prologue. - while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) + while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) { + DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(MBBI)); LastPush = MBBI++; + } } // Move past the aligned DPRCS2 area. @@ -343,18 +475,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { NumBytes = 0; } - unsigned adjustedGPRCS1Size = GPRCS1Size; if (NumBytes) { // Adjust SP after all the callee-save spills. - if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes)) { - if (LastPush == GPRCS1Push) { - FramePtrOffsetInPush += NumBytes; - adjustedGPRCS1Size += NumBytes; - NumBytes = 0; - } - } else + if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes)) + DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes); + else { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, MachineInstr::FrameSetup); + DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes); + } if (HasFP && isARM) // Restore from fp only in ARM mode: e.g. sub sp, r7, #24 @@ -368,13 +497,42 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setShouldRestoreSPFromFP(true); } - if (adjustedGPRCS1Size > 0) { - CFAOffset -= adjustedGPRCS1Size; - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); - MachineBasicBlock::iterator Pos = ++GPRCS1Push; - BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + // Set FP to point to the stack slot that contains the previous FP. + // For iOS, FP is R7, which has now been stored in spill area 1. + // Otherwise, if this is not iOS, all the callee-saved registers go + // into spill area 1, including the FP in R11. In either case, it + // is in area one and the adjustment needs to take place just after + // that push. + if (HasFP) { + MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push); + unsigned PushSize = sizeOfSPAdjustment(GPRCS1Push); + emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, + dl, TII, FramePtr, ARM::SP, + PushSize + FramePtrOffsetInPush, + MachineInstr::FrameSetup); + if (FramePtrOffsetInPush + PushSize != 0) { + unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( + nullptr, MRI->getDwarfRegNum(FramePtr, true), + -(ArgRegsSaveSize - FramePtrOffsetInPush))); + BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } else { + unsigned CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( + nullptr, MRI->getDwarfRegNum(FramePtr, true))); + BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } + } + + // Now that the prologue's actual instructions are finalised, we can insert + // the necessary DWARF cf instructions to describe the situation. Start by + // recording where each register ended up: + if (GPRCS1Size > 0) { + MachineBasicBlock::iterator Pos = std::next(GPRCS1Push); + int CFIIndex; for (const auto &Entry : CSI) { unsigned Reg = Entry.getReg(); int FI = Entry.getFrameIdx(); @@ -399,47 +557,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI))); BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); break; } } } - // Set FP to point to the stack slot that contains the previous FP. - // For iOS, FP is R7, which has now been stored in spill area 1. - // Otherwise, if this is not iOS, all the callee-saved registers go - // into spill area 1, including the FP in R11. In either case, it - // is in area one and the adjustment needs to take place just after - // that push. - if (HasFP) { - emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, GPRCS1Push, dl, TII, - FramePtr, ARM::SP, FramePtrOffsetInPush, - MachineInstr::FrameSetup); - if (FramePtrOffsetInPush) { - CFAOffset += FramePtrOffsetInPush; - unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( - nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset)); - BuildMI(MBB, GPRCS1Push, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - - } else { - unsigned CFIIndex = - MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( - nullptr, MRI->getDwarfRegNum(FramePtr, true))); - BuildMI(MBB, GPRCS1Push, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } - } - if (GPRCS2Size > 0) { - MachineBasicBlock::iterator Pos = ++GPRCS2Push; - if (!HasFP) { - CFAOffset -= GPRCS2Size; - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); - BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } + MachineBasicBlock::iterator Pos = std::next(GPRCS2Push); for (const auto &Entry : CSI) { unsigned Reg = Entry.getReg(); int FI = Entry.getFrameIdx(); @@ -455,7 +581,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } break; } @@ -465,17 +592,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { if (DPRCSSize > 0) { // Since vpush register list cannot have gaps, there may be multiple vpush // instructions in the prologue. - do { - MachineBasicBlock::iterator Push = DPRCSPush++; - if (!HasFP) { - CFAOffset -= sizeOfSPAdjustment(Push); - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); - BuildMI(MBB, DPRCSPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } - } while (DPRCSPush->getOpcode() == ARM::VSTMDDB_UPD); - + MachineBasicBlock::iterator Pos = std::next(LastPush); for (const auto &Entry : CSI) { unsigned Reg = Entry.getReg(); int FI = Entry.getFrameIdx(); @@ -485,21 +602,18 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { unsigned Offset = MFI->getObjectOffset(FI); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); - BuildMI(MBB, DPRCSPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } } } - if (NumBytes) { - if (!HasFP) { - CFAOffset -= NumBytes; - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } - } + // Now we can emit descriptions of where the canonical frame address was + // throughout the process. If we have a frame pointer, it takes over the job + // half-way through, so only the first few .cfi_def_cfa_offset instructions + // actually get emitted. + DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP); if (STI.isTargetELF() && hasFP(MF)) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - @@ -507,6 +621,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); + AFI->setDPRCalleeSavedGapSize(DPRGapSize); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); // If we need dynamic stack realignment, do it here. Be paranoid and make @@ -515,28 +630,24 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // realigned. if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) { unsigned MaxAlign = MFI->getMaxAlignment(); - assert (!AFI->isThumb1OnlyFunction()); + assert(!AFI->isThumb1OnlyFunction()); if (!AFI->isThumbFunction()) { - // Emit bic sp, sp, MaxAlign - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, - TII.get(ARM::BICri), ARM::SP) - .addReg(ARM::SP, RegState::Kill) - .addImm(MaxAlign-1))); + emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign, + false); } else { - // We cannot use sp as source/dest register here, thus we're emitting the - // following sequence: + // We cannot use sp as source/dest register here, thus we're using r4 to + // perform the calculations. We're emitting the following sequence: // mov r4, sp - // bic r4, r4, MaxAlign + // -- use emitAligningInstructions to produce best sequence to zero + // -- out lower bits in r4 // mov sp, r4 // FIXME: It will be better just to find spare register here. AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) - .addReg(ARM::SP, RegState::Kill)); - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, - TII.get(ARM::t2BICri), ARM::R4) - .addReg(ARM::R4, RegState::Kill) - .addImm(MaxAlign-1))); + .addReg(ARM::SP, RegState::Kill)); + emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign, + false); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) - .addReg(ARM::R4, RegState::Kill)); + .addReg(ARM::R4, RegState::Kill)); } AFI->setShouldRestoreSPFromFP(true); @@ -574,7 +685,7 @@ void ARMFrameLowering::fixTCReturn(MachineFunction &MF, unsigned RetOpcode = MBBI->getOpcode(); DebugLoc dl = MBBI->getDebugLoc(); const ARMBaseInstrInfo &TII = - *MF.getTarget().getSubtarget<ARMSubtarget>().getInstrInfo(); + *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri)) return; @@ -622,14 +733,17 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); assert(!AFI->isThumb1OnlyFunction() && "This emitEpilogue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned Align = MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); @@ -659,6 +773,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, NumBytes -= (ArgRegsSaveSize + AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + + AFI->getDPRCalleeSavedGapSize() + AFI->getDPRCalleeSavedAreaSize()); // Reset SP based on frame pointer only if the stack frame extends beyond @@ -707,6 +822,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, while (MBBI->getOpcode() == ARM::VLDMDIA_UPD) MBBI++; } + if (AFI->getDPRCalleeSavedGapSize()) { + assert(AFI->getDPRCalleeSavedGapSize() == 4 && + "unexpected DPR alignment gap"); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize()); + } + if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; } @@ -732,8 +853,8 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg, int SPAdj) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - const ARMBaseRegisterInfo *RegInfo = - static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); + const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); int FPOffset = Offset - AFI->getFramePtrSpillOffset(); @@ -818,7 +939,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, unsigned NumAlignedDPRCS2Regs, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); @@ -891,7 +1012,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, bool(*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs) const { MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc DL = MI->getDebugLoc(); unsigned RetOpcode = MI->getOpcode(); @@ -981,7 +1102,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc DL = MI->getDebugLoc(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); MachineFrameInfo &MFI = *MF.getFrameInfo(); // Mark the D-register spill slots as properly aligned. Since MFI computes @@ -1021,15 +1142,16 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, // The immediate is <= 64, so it doesn't need any special encoding. unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri; AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) - .addReg(ARM::SP) - .addImm(8 * NumAlignedDPRCS2Regs))); + .addReg(ARM::SP) + .addImm(8 * NumAlignedDPRCS2Regs))); - // bic r4, r4, #align-1 - Opc = isThumb ? ARM::t2BICri : ARM::BICri; unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment(); - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) - .addReg(ARM::R4, RegState::Kill) - .addImm(MaxAlign - 1))); + // We must set parameter MustBeSingleInstruction to true, since + // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform + // stack alignment. Luckily, this can always be done since all ARM + // architecture versions that support Neon also support the BFC + // instruction. + emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true); // mov sp, r4 // The stack pointer must be adjusted before spilling anything, otherwise @@ -1140,7 +1262,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc DL = MI->getDebugLoc(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); // Find the frame index assigned to d8. int D8SpillFI = 0; @@ -1355,12 +1477,15 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { return; // Don't bother if the default stack alignment is sufficiently high. - if (MF.getTarget().getFrameLowering()->getStackAlignment() >= 8) + if (MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment() >= 8) return; // Aligned spills require stack realignment. - const ARMBaseRegisterInfo *RegInfo = - static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); + const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); if (!RegInfo->canRealignStack(MF)) return; @@ -1399,10 +1524,10 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, unsigned NumGPRSpills = 0; SmallVector<unsigned, 4> UnspilledCS1GPRs; SmallVector<unsigned, 4> UnspilledCS2GPRs; - const ARMBaseRegisterInfo *RegInfo = - static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); + const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -1565,7 +1690,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // of GPRs, spill one extra callee save GPR so we won't have to pad between // the integer and double callee save areas. unsigned TargetAlign = getStackAlignment(); - if (TargetAlign == 8 && (NumGPRSpills & 1)) { + if (TargetAlign >= 8 && (NumGPRSpills & 1)) { if (CS1Spilled && !UnspilledCS1GPRs.empty()) { for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { unsigned Reg = UnspilledCS1GPRs[i]; @@ -1643,7 +1768,7 @@ void ARMFrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); if (!hasReservedCallFrame(MF)) { // If we have alloca, convert as follows: // ADJCALLSTACKDOWN -> sub, sp, sp, amount @@ -1761,7 +1886,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { MCContext &Context = MMI.getContext(); const MCRegisterInfo *MRI = Context.getRegisterInfo(); const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc DL; |