summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/patches/patch-07-llvm-r227752-boot2-shrink.diff
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/patches/patch-07-llvm-r227752-boot2-shrink.diff')
-rw-r--r--contrib/llvm/patches/patch-07-llvm-r227752-boot2-shrink.diff1271
1 files changed, 0 insertions, 1271 deletions
diff --git a/contrib/llvm/patches/patch-07-llvm-r227752-boot2-shrink.diff b/contrib/llvm/patches/patch-07-llvm-r227752-boot2-shrink.diff
deleted file mode 100644
index 57e16d7..0000000
--- a/contrib/llvm/patches/patch-07-llvm-r227752-boot2-shrink.diff
+++ /dev/null
@@ -1,1271 +0,0 @@
-Pull in r227752 from upstream llvm trunk (by Michael Kuperstein):
-
- [X86] Convert esp-relative movs of function arguments to pushes, step 2
-
- This moves the transformation introduced in r223757 into a separate MI pass.
- This allows it to cover many more cases (not only cases where there must be a
- reserved call frame), and perform rudimentary call folding. It still doesn't
- have a heuristic, so it is enabled only for optsize/minsize, with stack
- alignment <= 8, where it ought to be a fairly clear win.
-
- (Re-commit of r227728)
-
- Differential Revision: http://reviews.llvm.org/D6789
-
-This helps to get sys/boot/i386/boot2 below the required size again,
-when optimizing with -Oz.
-
-Introduced here: http://svnweb.freebsd.org/changeset/base/278112
-
-Index: include/llvm/Target/TargetFrameLowering.h
-===================================================================
---- include/llvm/Target/TargetFrameLowering.h
-+++ include/llvm/Target/TargetFrameLowering.h
-@@ -193,6 +193,11 @@ class TargetFrameLowering {
- return hasReservedCallFrame(MF) || hasFP(MF);
- }
-
-+ // needsFrameIndexResolution - Do we need to perform FI resolution for
-+ // this function. Normally, this is required only when the function
-+ // has any stack objects. However, targets may want to override this.
-+ virtual bool needsFrameIndexResolution(const MachineFunction &MF) const;
-+
- /// getFrameIndexOffset - Returns the displacement from the frame register to
- /// the stack frame of the specified index.
- virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
-Index: lib/CodeGen/PrologEpilogInserter.cpp
-===================================================================
---- lib/CodeGen/PrologEpilogInserter.cpp
-+++ lib/CodeGen/PrologEpilogInserter.cpp
-@@ -703,7 +703,8 @@ void PEI::insertPrologEpilogCode(MachineFunction &
- /// register references and actual offsets.
- ///
- void PEI::replaceFrameIndices(MachineFunction &Fn) {
-- if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do?
-+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
-+ if (!TFI.needsFrameIndexResolution(Fn)) return;
-
- // Store SPAdj at exit of a basic block.
- SmallVector<int, 8> SPState;
-@@ -769,13 +770,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *B
- continue;
- }
-
-- // If we are looking at a call sequence, we need to keep track of
-- // the SP adjustment made by each instruction in the sequence.
-- // This includes both the frame setup/destroy pseudos (handled above),
-- // as well as other instructions that have side effects w.r.t the SP.
-- if (InsideCallSequence)
-- SPAdj += TII.getSPAdjust(I);
--
- MachineInstr *MI = I;
- bool DoIncr = true;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-@@ -854,6 +848,16 @@ void PEI::replaceFrameIndices(MachineBasicBlock *B
- break;
- }
-
-+ // If we are looking at a call sequence, we need to keep track of
-+ // the SP adjustment made by each instruction in the sequence.
-+ // This includes both the frame setup/destroy pseudos (handled above),
-+ // as well as other instructions that have side effects w.r.t the SP.
-+ // Note that this must come after eliminateFrameIndex, because
-+ // if I itself referred to a frame index, we shouldn't count its own
-+ // adjustment.
-+ if (MI && InsideCallSequence)
-+ SPAdj += TII.getSPAdjust(MI);
-+
- if (DoIncr && I != BB->end()) ++I;
-
- // Update register states.
-Index: lib/CodeGen/TargetFrameLoweringImpl.cpp
-===================================================================
---- lib/CodeGen/TargetFrameLoweringImpl.cpp
-+++ lib/CodeGen/TargetFrameLoweringImpl.cpp
-@@ -42,3 +42,8 @@ int TargetFrameLowering::getFrameIndexReference(co
- FrameReg = RI->getFrameRegister(MF);
- return getFrameIndexOffset(MF, FI);
- }
-+
-+bool TargetFrameLowering::needsFrameIndexResolution(
-+ const MachineFunction &MF) const {
-+ return MF.getFrameInfo()->hasStackObjects();
-+}
-Index: lib/Target/X86/CMakeLists.txt
-===================================================================
---- lib/Target/X86/CMakeLists.txt
-+++ lib/Target/X86/CMakeLists.txt
-@@ -14,6 +14,7 @@ add_public_tablegen_target(X86CommonTableGen)
-
- set(sources
- X86AsmPrinter.cpp
-+ X86CallFrameOptimization.cpp
- X86FastISel.cpp
- X86FloatingPoint.cpp
- X86FrameLowering.cpp
-Index: lib/Target/X86/X86.h
-===================================================================
---- lib/Target/X86/X86.h
-+++ lib/Target/X86/X86.h
-@@ -67,6 +67,11 @@ FunctionPass *createX86PadShortFunctions();
- /// to eliminate execution delays in some Atom processors.
- FunctionPass *createX86FixupLEAs();
-
-+/// createX86CallFrameOptimization - Return a pass that optimizes
-+/// the code-size of x86 call sequences. This is done by replacing
-+/// esp-relative movs with pushes.
-+FunctionPass *createX86CallFrameOptimization();
-+
- } // End llvm namespace
-
- #endif
-Index: lib/Target/X86/X86CallFrameOptimization.cpp
-===================================================================
---- lib/Target/X86/X86CallFrameOptimization.cpp
-+++ lib/Target/X86/X86CallFrameOptimization.cpp
-@@ -0,0 +1,400 @@
-+//===----- X86CallFrameOptimization.cpp - Optimize x86 call sequences -----===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This file defines a pass that optimizes call sequences on x86.
-+// Currently, it converts movs of function parameters onto the stack into
-+// pushes. This is beneficial for two main reasons:
-+// 1) The push instruction encoding is much smaller than an esp-relative mov
-+// 2) It is possible to push memory arguments directly. So, if the
-+// the transformation is preformed pre-reg-alloc, it can help relieve
-+// register pressure.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include <algorithm>
-+
-+#include "X86.h"
-+#include "X86InstrInfo.h"
-+#include "X86Subtarget.h"
-+#include "X86MachineFunctionInfo.h"
-+#include "llvm/ADT/Statistic.h"
-+#include "llvm/CodeGen/MachineFunctionPass.h"
-+#include "llvm/CodeGen/MachineInstrBuilder.h"
-+#include "llvm/CodeGen/MachineRegisterInfo.h"
-+#include "llvm/CodeGen/Passes.h"
-+#include "llvm/IR/Function.h"
-+#include "llvm/Support/Debug.h"
-+#include "llvm/Support/raw_ostream.h"
-+#include "llvm/Target/TargetInstrInfo.h"
-+
-+using namespace llvm;
-+
-+#define DEBUG_TYPE "x86-cf-opt"
-+
-+cl::opt<bool> NoX86CFOpt("no-x86-call-frame-opt",
-+ cl::desc("Avoid optimizing x86 call frames for size"),
-+ cl::init(false), cl::Hidden);
-+
-+namespace {
-+class X86CallFrameOptimization : public MachineFunctionPass {
-+public:
-+ X86CallFrameOptimization() : MachineFunctionPass(ID) {}
-+
-+ bool runOnMachineFunction(MachineFunction &MF) override;
-+
-+private:
-+ bool shouldPerformTransformation(MachineFunction &MF);
-+
-+ bool adjustCallSequence(MachineFunction &MF, MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator I);
-+
-+ MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup,
-+ unsigned Reg);
-+
-+ const char *getPassName() const override {
-+ return "X86 Optimize Call Frame";
-+ }
-+
-+ const TargetInstrInfo *TII;
-+ const TargetFrameLowering *TFL;
-+ const MachineRegisterInfo *MRI;
-+ static char ID;
-+};
-+
-+char X86CallFrameOptimization::ID = 0;
-+}
-+
-+FunctionPass *llvm::createX86CallFrameOptimization() {
-+ return new X86CallFrameOptimization();
-+}
-+
-+// This checks whether the transformation is legal and profitable
-+bool X86CallFrameOptimization::shouldPerformTransformation(MachineFunction &MF) {
-+ if (NoX86CFOpt.getValue())
-+ return false;
-+
-+ // We currently only support call sequences where *all* parameters.
-+ // are passed on the stack.
-+ // No point in running this in 64-bit mode, since some arguments are
-+ // passed in-register in all common calling conventions, so the pattern
-+ // we're looking for will never match.
-+ const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
-+ if (STI.is64Bit())
-+ return false;
-+
-+ // You would expect straight-line code between call-frame setup and
-+ // call-frame destroy. You would be wrong. There are circumstances (e.g.
-+ // CMOV_GR8 expansion of a select that feeds a function call!) where we can
-+ // end up with the setup and the destroy in different basic blocks.
-+ // This is bad, and breaks SP adjustment.
-+ // So, check that all of the frames in the function are closed inside
-+ // the same block, and, for good measure, that there are no nested frames.
-+ int FrameSetupOpcode = TII->getCallFrameSetupOpcode();
-+ int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
-+ for (MachineBasicBlock &BB : MF) {
-+ bool InsideFrameSequence = false;
-+ for (MachineInstr &MI : BB) {
-+ if (MI.getOpcode() == FrameSetupOpcode) {
-+ if (InsideFrameSequence)
-+ return false;
-+ InsideFrameSequence = true;
-+ }
-+ else if (MI.getOpcode() == FrameDestroyOpcode) {
-+ if (!InsideFrameSequence)
-+ return false;
-+ InsideFrameSequence = false;
-+ }
-+ }
-+
-+ if (InsideFrameSequence)
-+ return false;
-+ }
-+
-+ // Now that we know the transformation is legal, check if it is
-+ // profitable.
-+ // TODO: Add a heuristic that actually looks at the function,
-+ // and enable this for more cases.
-+
-+ // This transformation is always a win when we expected to have
-+ // a reserved call frame. Under other circumstances, it may be either
-+ // a win or a loss, and requires a heuristic.
-+ // For now, enable it only for the relatively clear win cases.
-+ bool CannotReserveFrame = MF.getFrameInfo()->hasVarSizedObjects();
-+ if (CannotReserveFrame)
-+ return true;
-+
-+ // For now, don't even try to evaluate the profitability when
-+ // not optimizing for size.
-+ AttributeSet FnAttrs = MF.getFunction()->getAttributes();
-+ bool OptForSize =
-+ FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
-+ Attribute::OptimizeForSize) ||
-+ FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
-+
-+ if (!OptForSize)
-+ return false;
-+
-+ // Stack re-alignment can make this unprofitable even in terms of size.
-+ // As mentioned above, a better heuristic is needed. For now, don't do this
-+ // when the required alignment is above 8. (4 would be the safe choice, but
-+ // some experimentation showed 8 is generally good).
-+ if (TFL->getStackAlignment() > 8)
-+ return false;
-+
-+ return true;
-+}
-+
-+bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
-+ TII = MF.getSubtarget().getInstrInfo();
-+ TFL = MF.getSubtarget().getFrameLowering();
-+ MRI = &MF.getRegInfo();
-+
-+ if (!shouldPerformTransformation(MF))
-+ return false;
-+
-+ int FrameSetupOpcode = TII->getCallFrameSetupOpcode();
-+
-+ bool Changed = false;
-+
-+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
-+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
-+ if (I->getOpcode() == FrameSetupOpcode)
-+ Changed |= adjustCallSequence(MF, *BB, I);
-+
-+ return Changed;
-+}
-+
-+bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
-+ MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator I) {
-+
-+ // Check that this particular call sequence is amenable to the
-+ // transformation.
-+ const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
-+ MF.getSubtarget().getRegisterInfo());
-+ unsigned StackPtr = RegInfo.getStackRegister();
-+ int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
-+
-+ // We expect to enter this at the beginning of a call sequence
-+ assert(I->getOpcode() == TII->getCallFrameSetupOpcode());
-+ MachineBasicBlock::iterator FrameSetup = I++;
-+
-+
-+ // For globals in PIC mode, we can have some LEAs here.
-+ // Ignore them, they don't bother us.
-+ // TODO: Extend this to something that covers more cases.
-+ while (I->getOpcode() == X86::LEA32r)
-+ ++I;
-+
-+ // We expect a copy instruction here.
-+ // TODO: The copy instruction is a lowering artifact.
-+ // We should also support a copy-less version, where the stack
-+ // pointer is used directly.
-+ if (!I->isCopy() || !I->getOperand(0).isReg())
-+ return false;
-+ MachineBasicBlock::iterator SPCopy = I++;
-+ StackPtr = SPCopy->getOperand(0).getReg();
-+
-+ // Scan the call setup sequence for the pattern we're looking for.
-+ // We only handle a simple case - a sequence of MOV32mi or MOV32mr
-+ // instructions, that push a sequence of 32-bit values onto the stack, with
-+ // no gaps between them.
-+ SmallVector<MachineInstr*, 4> MovVector(4, nullptr);
-+ unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4;
-+ if (MaxAdjust > 4)
-+ MovVector.resize(MaxAdjust, nullptr);
-+
-+ do {
-+ int Opcode = I->getOpcode();
-+ if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr)
-+ break;
-+
-+ // We only want movs of the form:
-+ // movl imm/r32, k(%esp)
-+ // If we run into something else, bail.
-+ // Note that AddrBaseReg may, counter to its name, not be a register,
-+ // but rather a frame index.
-+ // TODO: Support the fi case. This should probably work now that we
-+ // have the infrastructure to track the stack pointer within a call
-+ // sequence.
-+ if (!I->getOperand(X86::AddrBaseReg).isReg() ||
-+ (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||
-+ !I->getOperand(X86::AddrScaleAmt).isImm() ||
-+ (I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||
-+ (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
-+ (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
-+ !I->getOperand(X86::AddrDisp).isImm())
-+ return false;
-+
-+ int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
-+ assert(StackDisp >= 0 && "Negative stack displacement when passing parameters");
-+
-+ // We really don't want to consider the unaligned case.
-+ if (StackDisp % 4)
-+ return false;
-+ StackDisp /= 4;
-+
-+ assert((size_t)StackDisp < MovVector.size() &&
-+ "Function call has more parameters than the stack is adjusted for.");
-+
-+ // If the same stack slot is being filled twice, something's fishy.
-+ if (MovVector[StackDisp] != nullptr)
-+ return false;
-+ MovVector[StackDisp] = I;
-+
-+ ++I;
-+ } while (I != MBB.end());
-+
-+ // We now expect the end of the sequence - a call and a stack adjust.
-+ if (I == MBB.end())
-+ return false;
-+
-+ // For PCrel calls, we expect an additional COPY of the basereg.
-+ // If we find one, skip it.
-+ if (I->isCopy()) {
-+ if (I->getOperand(1).getReg() ==
-+ MF.getInfo<X86MachineFunctionInfo>()->getGlobalBaseReg())
-+ ++I;
-+ else
-+ return false;
-+ }
-+
-+ if (!I->isCall())
-+ return false;
-+ MachineBasicBlock::iterator Call = I;
-+ if ((++I)->getOpcode() != FrameDestroyOpcode)
-+ return false;
-+
-+ // Now, go through the vector, and see that we don't have any gaps,
-+ // but only a series of 32-bit MOVs.
-+
-+ int64_t ExpectedDist = 0;
-+ auto MMI = MovVector.begin(), MME = MovVector.end();
-+ for (; MMI != MME; ++MMI, ExpectedDist += 4)
-+ if (*MMI == nullptr)
-+ break;
-+
-+ // If the call had no parameters, do nothing
-+ if (!ExpectedDist)
-+ return false;
-+
-+ // We are either at the last parameter, or a gap.
-+ // Make sure it's not a gap
-+ for (; MMI != MME; ++MMI)
-+ if (*MMI != nullptr)
-+ return false;
-+
-+ // Ok, we can in fact do the transformation for this call.
-+ // Do not remove the FrameSetup instruction, but adjust the parameters.
-+ // PEI will end up finalizing the handling of this.
-+ FrameSetup->getOperand(1).setImm(ExpectedDist);
-+
-+ DebugLoc DL = I->getDebugLoc();
-+ // Now, iterate through the vector in reverse order, and replace the movs
-+ // with pushes. MOVmi/MOVmr doesn't have any defs, so no need to
-+ // replace uses.
-+ for (int Idx = (ExpectedDist / 4) - 1; Idx >= 0; --Idx) {
-+ MachineBasicBlock::iterator MOV = *MovVector[Idx];
-+ MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
-+ if (MOV->getOpcode() == X86::MOV32mi) {
-+ unsigned PushOpcode = X86::PUSHi32;
-+ // If the operand is a small (8-bit) immediate, we can use a
-+ // PUSH instruction with a shorter encoding.
-+ // Note that isImm() may fail even though this is a MOVmi, because
-+ // the operand can also be a symbol.
-+ if (PushOp.isImm()) {
-+ int64_t Val = PushOp.getImm();
-+ if (isInt<8>(Val))
-+ PushOpcode = X86::PUSH32i8;
-+ }
-+ BuildMI(MBB, Call, DL, TII->get(PushOpcode)).addOperand(PushOp);
-+ } else {
-+ unsigned int Reg = PushOp.getReg();
-+
-+ // If PUSHrmm is not slow on this target, try to fold the source of the
-+ // push into the instruction.
-+ const X86Subtarget &ST = MF.getTarget().getSubtarget<X86Subtarget>();
-+ bool SlowPUSHrmm = ST.isAtom() || ST.isSLM();
-+
-+ // Check that this is legal to fold. Right now, we're extremely
-+ // conservative about that.
-+ MachineInstr *DefMov = nullptr;
-+ if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
-+ MachineInstr *Push = BuildMI(MBB, Call, DL, TII->get(X86::PUSH32rmm));
-+
-+ unsigned NumOps = DefMov->getDesc().getNumOperands();
-+ for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
-+ Push->addOperand(DefMov->getOperand(i));
-+
-+ DefMov->eraseFromParent();
-+ } else {
-+ BuildMI(MBB, Call, DL, TII->get(X86::PUSH32r)).addReg(Reg).getInstr();
-+ }
-+ }
-+
-+ MBB.erase(MOV);
-+ }
-+
-+ // The stack-pointer copy is no longer used in the call sequences.
-+ // There should not be any other users, but we can't commit to that, so:
-+ if (MRI->use_empty(SPCopy->getOperand(0).getReg()))
-+ SPCopy->eraseFromParent();
-+
-+ // Once we've done this, we need to make sure PEI doesn't assume a reserved
-+ // frame.
-+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
-+ FuncInfo->setHasPushSequences(true);
-+
-+ return true;
-+}
-+
-+MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
-+ MachineBasicBlock::iterator FrameSetup, unsigned Reg) {
-+ // Do an extremely restricted form of load folding.
-+ // ISel will often create patterns like:
-+ // movl 4(%edi), %eax
-+ // movl 8(%edi), %ecx
-+ // movl 12(%edi), %edx
-+ // movl %edx, 8(%esp)
-+ // movl %ecx, 4(%esp)
-+ // movl %eax, (%esp)
-+ // call
-+ // Get rid of those with prejudice.
-+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
-+ return nullptr;
-+
-+ // Make sure this is the only use of Reg.
-+ if (!MRI->hasOneNonDBGUse(Reg))
-+ return nullptr;
-+
-+ MachineBasicBlock::iterator DefMI = MRI->getVRegDef(Reg);
-+
-+ // Make sure the def is a MOV from memory.
-+ // If the def is an another block, give up.
-+ if (DefMI->getOpcode() != X86::MOV32rm ||
-+ DefMI->getParent() != FrameSetup->getParent())
-+ return nullptr;
-+
-+ // Be careful with movs that load from a stack slot, since it may get
-+ // resolved incorrectly.
-+ // TODO: Again, we already have the infrastructure, so this should work.
-+ if (!DefMI->getOperand(1).isReg())
-+ return nullptr;
-+
-+ // Now, make sure everything else up until the ADJCALLSTACK is a sequence
-+ // of MOVs. To be less conservative would require duplicating a lot of the
-+ // logic from PeepholeOptimizer.
-+ // FIXME: A possibly better approach would be to teach the PeepholeOptimizer
-+ // to be smarter about folding into pushes.
-+ for (auto I = DefMI; I != FrameSetup; ++I)
-+ if (I->getOpcode() != X86::MOV32rm)
-+ return nullptr;
-+
-+ return DefMI;
-+}
-Index: lib/Target/X86/X86FastISel.cpp
-===================================================================
---- lib/Target/X86/X86FastISel.cpp
-+++ lib/Target/X86/X86FastISel.cpp
-@@ -2735,7 +2735,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &
- // Issue CALLSEQ_START
- unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
-- .addImm(NumBytes);
-+ .addImm(NumBytes).addImm(0);
-
- // Walk the register/memloc assignments, inserting copies/loads.
- const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
-Index: lib/Target/X86/X86FrameLowering.cpp
-===================================================================
---- lib/Target/X86/X86FrameLowering.cpp
-+++ lib/Target/X86/X86FrameLowering.cpp
-@@ -38,9 +38,36 @@ using namespace llvm;
- extern cl::opt<bool> ForceStackAlign;
-
- bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
-- return !MF.getFrameInfo()->hasVarSizedObjects();
-+ return !MF.getFrameInfo()->hasVarSizedObjects() &&
-+ !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
- }
-
-+/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
-+/// call frame pseudos can be simplified. Having a FP, as in the default
-+/// implementation, is not sufficient here since we can't always use it.
-+/// Use a more nuanced condition.
-+bool
-+X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
-+ const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>
-+ (MF.getSubtarget().getRegisterInfo());
-+ return hasReservedCallFrame(MF) ||
-+ (hasFP(MF) && !TRI->needsStackRealignment(MF))
-+ || TRI->hasBasePointer(MF);
-+}
-+
-+// needsFrameIndexResolution - Do we need to perform FI resolution for
-+// this function. Normally, this is required only when the function
-+// has any stack objects. However, FI resolution actually has another job,
-+// not apparent from the title - it resolves callframesetup/destroy
-+// that were not simplified earlier.
-+// So, this is required for x86 functions that have push sequences even
-+// when there are no stack objects.
-+bool
-+X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
-+ return MF.getFrameInfo()->hasStackObjects() ||
-+ MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
-+}
-+
- /// hasFP - Return true if the specified function should have a dedicated frame
- /// pointer register. This is true if the function has variable sized allocas
- /// or if frame pointer elimination is disabled.
-@@ -93,16 +120,6 @@ static unsigned getANDriOpcode(bool IsLP64, int64_
- return X86::AND32ri;
- }
-
--static unsigned getPUSHiOpcode(bool IsLP64, MachineOperand MO) {
-- // We don't support LP64 for now.
-- assert(!IsLP64);
--
-- if (MO.isImm() && isInt<8>(MO.getImm()))
-- return X86::PUSH32i8;
--
-- return X86::PUSHi32;;
--}
--
- static unsigned getLEArOpcode(unsigned IsLP64) {
- return IsLP64 ? X86::LEA64r : X86::LEA32r;
- }
-@@ -1882,100 +1899,6 @@ void X86FrameLowering::adjustForHiPEPrologue(Machi
- #endif
- }
-
--bool X86FrameLowering::
--convertArgMovsToPushes(MachineFunction &MF, MachineBasicBlock &MBB,
-- MachineBasicBlock::iterator I, uint64_t Amount) const {
-- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
-- const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
-- MF.getSubtarget().getRegisterInfo());
-- unsigned StackPtr = RegInfo.getStackRegister();
--
-- // Scan the call setup sequence for the pattern we're looking for.
-- // We only handle a simple case now - a sequence of MOV32mi or MOV32mr
-- // instructions, that push a sequence of 32-bit values onto the stack, with
-- // no gaps.
-- std::map<int64_t, MachineBasicBlock::iterator> MovMap;
-- do {
-- int Opcode = I->getOpcode();
-- if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr)
-- break;
--
-- // We only want movs of the form:
-- // movl imm/r32, k(%ecx)
-- // If we run into something else, bail
-- // Note that AddrBaseReg may, counterintuitively, not be a register...
-- if (!I->getOperand(X86::AddrBaseReg).isReg() ||
-- (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||
-- !I->getOperand(X86::AddrScaleAmt).isImm() ||
-- (I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||
-- (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
-- (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
-- !I->getOperand(X86::AddrDisp).isImm())
-- return false;
--
-- int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
--
-- // We don't want to consider the unaligned case.
-- if (StackDisp % 4)
-- return false;
--
-- // If the same stack slot is being filled twice, something's fishy.
-- if (!MovMap.insert(std::pair<int64_t, MachineInstr*>(StackDisp, I)).second)
-- return false;
--
-- ++I;
-- } while (I != MBB.end());
--
-- // We now expect the end of the sequence - a call and a stack adjust.
-- if (I == MBB.end())
-- return false;
-- if (!I->isCall())
-- return false;
-- MachineBasicBlock::iterator Call = I;
-- if ((++I)->getOpcode() != TII.getCallFrameDestroyOpcode())
-- return false;
--
-- // Now, go through the map, and see that we don't have any gaps,
-- // but only a series of 32-bit MOVs.
-- // Since std::map provides ordered iteration, the original order
-- // of the MOVs doesn't matter.
-- int64_t ExpectedDist = 0;
-- for (auto MMI = MovMap.begin(), MME = MovMap.end(); MMI != MME;
-- ++MMI, ExpectedDist += 4)
-- if (MMI->first != ExpectedDist)
-- return false;
--
-- // Ok, everything looks fine. Do the transformation.
-- DebugLoc DL = I->getDebugLoc();
--
-- // It's possible the original stack adjustment amount was larger than
-- // that done by the pushes. If so, we still need a SUB.
-- Amount -= ExpectedDist;
-- if (Amount) {
-- MachineInstr* Sub = BuildMI(MBB, Call, DL,
-- TII.get(getSUBriOpcode(false, Amount)), StackPtr)
-- .addReg(StackPtr).addImm(Amount);
-- Sub->getOperand(3).setIsDead();
-- }
--
-- // Now, iterate through the map in reverse order, and replace the movs
-- // with pushes. MOVmi/MOVmr doesn't have any defs, so need to replace uses.
-- for (auto MMI = MovMap.rbegin(), MME = MovMap.rend(); MMI != MME; ++MMI) {
-- MachineBasicBlock::iterator MOV = MMI->second;
-- MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
--
-- // Replace MOVmr with PUSH32r, and MOVmi with PUSHi of appropriate size
-- int PushOpcode = X86::PUSH32r;
-- if (MOV->getOpcode() == X86::MOV32mi)
-- PushOpcode = getPUSHiOpcode(false, PushOp);
--
-- BuildMI(MBB, Call, DL, TII.get(PushOpcode)).addOperand(PushOp);
-- MBB.erase(MOV);
-- }
--
-- return true;
--}
--
- void X86FrameLowering::
- eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
-@@ -1990,7 +1913,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
- bool IsLP64 = STI.isTarget64BitLP64();
- DebugLoc DL = I->getDebugLoc();
- uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
-- uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
-+ uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0;
- I = MBB.erase(I);
-
- if (!reserveCallFrame) {
-@@ -2010,24 +1933,18 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
- Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
-
- MachineInstr *New = nullptr;
-- if (Opcode == TII.getCallFrameSetupOpcode()) {
-- // Try to convert movs to the stack into pushes.
-- // We currently only look for a pattern that appears in 32-bit
-- // calling conventions.
-- if (!IsLP64 && convertArgMovsToPushes(MF, MBB, I, Amount))
-- return;
-
-- New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)),
-- StackPtr)
-- .addReg(StackPtr)
-- .addImm(Amount);
-- } else {
-- assert(Opcode == TII.getCallFrameDestroyOpcode());
-+ // Factor out the amount that gets handled inside the sequence
-+ // (Pushes of argument for frame setup, callee pops for frame destroy)
-+ Amount -= InternalAmt;
-
-- // Factor out the amount the callee already popped.
-- Amount -= CalleeAmt;
-+ if (Amount) {
-+ if (Opcode == TII.getCallFrameSetupOpcode()) {
-+ New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), StackPtr)
-+ .addReg(StackPtr).addImm(Amount);
-+ } else {
-+ assert(Opcode == TII.getCallFrameDestroyOpcode());
-
-- if (Amount) {
- unsigned Opc = getADDriOpcode(IsLP64, Amount);
- New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr).addImm(Amount);
-@@ -2045,13 +1962,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
- return;
- }
-
-- if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) {
-+ if (Opcode == TII.getCallFrameDestroyOpcode() && InternalAmt) {
- // If we are performing frame pointer elimination and if the callee pops
- // something off the stack pointer, add it back. We do this until we have
- // more advanced stack pointer tracking ability.
-- unsigned Opc = getSUBriOpcode(IsLP64, CalleeAmt);
-+ unsigned Opc = getSUBriOpcode(IsLP64, InternalAmt);
- MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
-- .addReg(StackPtr).addImm(CalleeAmt);
-+ .addReg(StackPtr).addImm(InternalAmt);
-
- // The EFLAGS implicit def is dead.
- New->getOperand(3).setIsDead();
-Index: lib/Target/X86/X86FrameLowering.h
-===================================================================
---- lib/Target/X86/X86FrameLowering.h
-+++ lib/Target/X86/X86FrameLowering.h
-@@ -66,6 +66,8 @@ class X86FrameLowering : public TargetFrameLowerin
-
- bool hasFP(const MachineFunction &MF) const override;
- bool hasReservedCallFrame(const MachineFunction &MF) const override;
-+ bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override;
-+ bool needsFrameIndexResolution(const MachineFunction &MF) const override;
-
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
- int getFrameIndexReference(const MachineFunction &MF, int FI,
-Index: lib/Target/X86/X86InstrCompiler.td
-===================================================================
---- lib/Target/X86/X86InstrCompiler.td
-+++ lib/Target/X86/X86InstrCompiler.td
-@@ -43,9 +43,9 @@ let hasSideEffects = 0, isNotDuplicable = 1, Uses
- // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
- // sub / add which can clobber EFLAGS.
- let Defs = [ESP, EFLAGS], Uses = [ESP] in {
--def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
-+def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
- "#ADJCALLSTACKDOWN",
-- [(X86callseq_start timm:$amt)]>,
-+ []>,
- Requires<[NotLP64]>;
- def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
- "#ADJCALLSTACKUP",
-@@ -52,7 +52,10 @@ def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins
- [(X86callseq_end timm:$amt1, timm:$amt2)]>,
- Requires<[NotLP64]>;
- }
-+def : Pat<(X86callseq_start timm:$amt1),
-+ (ADJCALLSTACKDOWN32 i32imm:$amt1, 0)>, Requires<[NotLP64]>;
-
-+
- // ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
- // a stack adjustment and the codegen must know that they may modify the stack
- // pointer before prolog-epilog rewriting occurs.
-@@ -59,9 +62,9 @@ def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins
- // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
- // sub / add which can clobber EFLAGS.
- let Defs = [RSP, EFLAGS], Uses = [RSP] in {
--def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
-+def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
- "#ADJCALLSTACKDOWN",
-- [(X86callseq_start timm:$amt)]>,
-+ []>,
- Requires<[IsLP64]>;
- def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
- "#ADJCALLSTACKUP",
-@@ -68,9 +71,10 @@ def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins
- [(X86callseq_end timm:$amt1, timm:$amt2)]>,
- Requires<[IsLP64]>;
- }
-+def : Pat<(X86callseq_start timm:$amt1),
-+ (ADJCALLSTACKDOWN64 i32imm:$amt1, 0)>, Requires<[IsLP64]>;
-
-
--
- // x86-64 va_start lowering magic.
- let usesCustomInserter = 1, Defs = [EFLAGS] in {
- def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
-Index: lib/Target/X86/X86InstrInfo.cpp
-===================================================================
---- lib/Target/X86/X86InstrInfo.cpp
-+++ lib/Target/X86/X86InstrInfo.cpp
-@@ -1692,6 +1692,58 @@ X86InstrInfo::isCoalescableExtInstr(const MachineI
- return false;
- }
-
-+int X86InstrInfo::getSPAdjust(const MachineInstr *MI) const {
-+ const MachineFunction *MF = MI->getParent()->getParent();
-+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
-+
-+ if (MI->getOpcode() == getCallFrameSetupOpcode() ||
-+ MI->getOpcode() == getCallFrameDestroyOpcode()) {
-+ unsigned StackAlign = TFI->getStackAlignment();
-+ int SPAdj = (MI->getOperand(0).getImm() + StackAlign - 1) / StackAlign *
-+ StackAlign;
-+
-+ SPAdj -= MI->getOperand(1).getImm();
-+
-+ if (MI->getOpcode() == getCallFrameSetupOpcode())
-+ return SPAdj;
-+ else
-+ return -SPAdj;
-+ }
-+
-+ // To know whether a call adjusts the stack, we need information
-+ // that is bound to the following ADJCALLSTACKUP pseudo.
-+ // Look for the next ADJCALLSTACKUP that follows the call.
-+ if (MI->isCall()) {
-+ const MachineBasicBlock* MBB = MI->getParent();
-+ auto I = ++MachineBasicBlock::const_iterator(MI);
-+ for (auto E = MBB->end(); I != E; ++I) {
-+ if (I->getOpcode() == getCallFrameDestroyOpcode() ||
-+ I->isCall())
-+ break;
-+ }
-+
-+ // If we could not find a frame destroy opcode, then it has already
-+ // been simplified, so we don't care.
-+ if (I->getOpcode() != getCallFrameDestroyOpcode())
-+ return 0;
-+
-+ return -(I->getOperand(1).getImm());
-+ }
-+
-+ // Currently handle only PUSHes we can reasonably expect to see
-+ // in call sequences
-+ switch (MI->getOpcode()) {
-+ default:
-+ return 0;
-+ case X86::PUSH32i8:
-+ case X86::PUSH32r:
-+ case X86::PUSH32rmm:
-+ case X86::PUSH32rmr:
-+ case X86::PUSHi32:
-+ return 4;
-+ }
-+}
-+
- /// isFrameOperand - Return true and the FrameIndex if the specified
- /// operand and follow operands form a reference to the stack frame.
- bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
-Index: lib/Target/X86/X86InstrInfo.h
-===================================================================
---- lib/Target/X86/X86InstrInfo.h
-+++ lib/Target/X86/X86InstrInfo.h
-@@ -175,6 +175,11 @@ class X86InstrInfo final : public X86GenInstrInfo
- ///
- const X86RegisterInfo &getRegisterInfo() const { return RI; }
-
-+ /// getSPAdjust - This returns the stack pointer adjustment made by
-+ /// this instruction. For x86, we need to handle more complex call
-+ /// sequences involving PUSHes.
-+ int getSPAdjust(const MachineInstr *MI) const override;
-+
- /// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
- /// extension instruction. That is, it's like a copy where it's legal for the
- /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
-Index: lib/Target/X86/X86MachineFunctionInfo.h
-===================================================================
---- lib/Target/X86/X86MachineFunctionInfo.h
-+++ lib/Target/X86/X86MachineFunctionInfo.h
-@@ -77,6 +77,9 @@ class X86MachineFunctionInfo : public MachineFunct
- unsigned ArgumentStackSize;
- /// NumLocalDynamics - Number of local-dynamic TLS accesses.
- unsigned NumLocalDynamics;
-+ /// HasPushSequences - Keeps track of whether this function uses sequences
-+ /// of pushes to pass function parameters.
-+ bool HasPushSequences;
-
- private:
- /// ForwardedMustTailRegParms - A list of virtual and physical registers
-@@ -97,7 +100,8 @@ class X86MachineFunctionInfo : public MachineFunct
- VarArgsGPOffset(0),
- VarArgsFPOffset(0),
- ArgumentStackSize(0),
-- NumLocalDynamics(0) {}
-+ NumLocalDynamics(0),
-+ HasPushSequences(false) {}
-
- explicit X86MachineFunctionInfo(MachineFunction &MF)
- : ForceFramePointer(false),
-@@ -113,11 +117,15 @@ class X86MachineFunctionInfo : public MachineFunct
- VarArgsGPOffset(0),
- VarArgsFPOffset(0),
- ArgumentStackSize(0),
-- NumLocalDynamics(0) {}
-+ NumLocalDynamics(0),
-+ HasPushSequences(false) {}
-
- bool getForceFramePointer() const { return ForceFramePointer;}
- void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
-
-+ bool getHasPushSequences() const { return HasPushSequences; }
-+ void setHasPushSequences(bool HasPush) { HasPushSequences = HasPush; }
-+
- bool getRestoreBasePointer() const { return RestoreBasePointerOffset!=0; }
- void setRestoreBasePointer(const MachineFunction *MF);
- int getRestoreBasePointerOffset() const {return RestoreBasePointerOffset; }
-Index: lib/Target/X86/X86RegisterInfo.cpp
-===================================================================
---- lib/Target/X86/X86RegisterInfo.cpp
-+++ lib/Target/X86/X86RegisterInfo.cpp
-@@ -468,8 +468,6 @@ void
- X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS) const {
-- assert(SPAdj == 0 && "Unexpected");
--
- MachineInstr &MI = *II;
- MachineFunction &MF = *MI.getParent()->getParent();
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
-@@ -506,6 +504,9 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicB
- } else
- FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex);
-
-+ if (BasePtr == StackPtr)
-+ FIOffset += SPAdj;
-+
- // The frame index format for stackmaps and patchpoints is different from the
- // X86 format. It only has a FI and an offset.
- if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
-Index: lib/Target/X86/X86TargetMachine.cpp
-===================================================================
---- lib/Target/X86/X86TargetMachine.cpp
-+++ lib/Target/X86/X86TargetMachine.cpp
-@@ -154,6 +154,7 @@ class X86PassConfig : public TargetPassConfig {
- void addIRPasses() override;
- bool addInstSelector() override;
- bool addILPOpts() override;
-+ void addPreRegAlloc() override;
- void addPostRegAlloc() override;
- void addPreEmitPass() override;
- };
-@@ -187,6 +188,10 @@ bool X86PassConfig::addILPOpts() {
- return true;
- }
-
-+void X86PassConfig::addPreRegAlloc() {
-+ addPass(createX86CallFrameOptimization());
-+}
-+
- void X86PassConfig::addPostRegAlloc() {
- addPass(createX86FloatingPointStackifierPass());
- }
-Index: test/CodeGen/X86/inalloca-invoke.ll
-===================================================================
---- test/CodeGen/X86/inalloca-invoke.ll
-+++ test/CodeGen/X86/inalloca-invoke.ll
-@@ -31,7 +31,7 @@ blah:
- to label %invoke.cont unwind label %lpad
-
- ; Uses end as sret param.
--; CHECK: movl %[[end]], (%esp)
-+; CHECK: pushl %[[end]]
- ; CHECK: calll _plus
-
- invoke.cont:
-Index: test/CodeGen/X86/movtopush.ll
-===================================================================
---- test/CodeGen/X86/movtopush.ll
-+++ test/CodeGen/X86/movtopush.ll
-@@ -1,10 +1,12 @@
- ; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=NORMAL
-+; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=X64
- ; RUN: llc < %s -mtriple=i686-windows -force-align-stack -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED
-+
- declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
- declare void @inreg(i32 %a, i32 inreg %b, i32 %c, i32 %d)
-
- ; Here, we should have a reserved frame, so we don't expect pushes
--; NORMAL-LABEL: test1
-+; NORMAL-LABEL: test1:
- ; NORMAL: subl $16, %esp
- ; NORMAL-NEXT: movl $4, 12(%esp)
- ; NORMAL-NEXT: movl $3, 8(%esp)
-@@ -11,6 +13,7 @@ declare void @inreg(i32 %a, i32 inreg %b, i32 %c,
- ; NORMAL-NEXT: movl $2, 4(%esp)
- ; NORMAL-NEXT: movl $1, (%esp)
- ; NORMAL-NEXT: call
-+; NORMAL-NEXT: addl $16, %esp
- define void @test1() {
- entry:
- call void @good(i32 1, i32 2, i32 3, i32 4)
-@@ -17,8 +20,10 @@ entry:
- ret void
- }
-
--; Here, we expect a sequence of 4 immediate pushes
--; NORMAL-LABEL: test2
-+; We're optimizing for code size, so we should get pushes for x86,
-+; even though there is a reserved call frame.
-+; Make sure we don't touch x86-64
-+; NORMAL-LABEL: test1b:
- ; NORMAL-NOT: subl {{.*}} %esp
- ; NORMAL: pushl $4
- ; NORMAL-NEXT: pushl $3
-@@ -25,6 +30,42 @@ entry:
- ; NORMAL-NEXT: pushl $2
- ; NORMAL-NEXT: pushl $1
- ; NORMAL-NEXT: call
-+; NORMAL-NEXT: addl $16, %esp
-+; X64-LABEL: test1b:
-+; X64: movl $1, %ecx
-+; X64-NEXT: movl $2, %edx
-+; X64-NEXT: movl $3, %r8d
-+; X64-NEXT: movl $4, %r9d
-+; X64-NEXT: callq good
-+define void @test1b() optsize {
-+entry:
-+ call void @good(i32 1, i32 2, i32 3, i32 4)
-+ ret void
-+}
-+
-+; Same as above, but for minsize
-+; NORMAL-LABEL: test1c:
-+; NORMAL-NOT: subl {{.*}} %esp
-+; NORMAL: pushl $4
-+; NORMAL-NEXT: pushl $3
-+; NORMAL-NEXT: pushl $2
-+; NORMAL-NEXT: pushl $1
-+; NORMAL-NEXT: call
-+; NORMAL-NEXT: addl $16, %esp
-+define void @test1c() minsize {
-+entry:
-+ call void @good(i32 1, i32 2, i32 3, i32 4)
-+ ret void
-+}
-+
-+; If we have a reserved frame, we should have pushes
-+; NORMAL-LABEL: test2:
-+; NORMAL-NOT: subl {{.*}} %esp
-+; NORMAL: pushl $4
-+; NORMAL-NEXT: pushl $3
-+; NORMAL-NEXT: pushl $2
-+; NORMAL-NEXT: pushl $1
-+; NORMAL-NEXT: call
- define void @test2(i32 %k) {
- entry:
- %a = alloca i32, i32 %k
-@@ -34,7 +75,7 @@ entry:
-
- ; Again, we expect a sequence of 4 immediate pushes
- ; Checks that we generate the right pushes for >8bit immediates
--; NORMAL-LABEL: test2b
-+; NORMAL-LABEL: test2b:
- ; NORMAL-NOT: subl {{.*}} %esp
- ; NORMAL: pushl $4096
- ; NORMAL-NEXT: pushl $3072
-@@ -41,15 +82,15 @@ entry:
- ; NORMAL-NEXT: pushl $2048
- ; NORMAL-NEXT: pushl $1024
- ; NORMAL-NEXT: call
--define void @test2b(i32 %k) {
-+; NORMAL-NEXT: addl $16, %esp
-+define void @test2b() optsize {
- entry:
-- %a = alloca i32, i32 %k
- call void @good(i32 1024, i32 2048, i32 3072, i32 4096)
- ret void
- }
-
- ; The first push should push a register
--; NORMAL-LABEL: test3
-+; NORMAL-LABEL: test3:
- ; NORMAL-NOT: subl {{.*}} %esp
- ; NORMAL: pushl $4
- ; NORMAL-NEXT: pushl $3
-@@ -56,15 +97,15 @@ entry:
- ; NORMAL-NEXT: pushl $2
- ; NORMAL-NEXT: pushl %e{{..}}
- ; NORMAL-NEXT: call
--define void @test3(i32 %k) {
-+; NORMAL-NEXT: addl $16, %esp
-+define void @test3(i32 %k) optsize {
- entry:
-- %a = alloca i32, i32 %k
- call void @good(i32 %k, i32 2, i32 3, i32 4)
- ret void
- }
-
- ; We don't support weird calling conventions
--; NORMAL-LABEL: test4
-+; NORMAL-LABEL: test4:
- ; NORMAL: subl $12, %esp
- ; NORMAL-NEXT: movl $4, 8(%esp)
- ; NORMAL-NEXT: movl $3, 4(%esp)
-@@ -71,16 +112,16 @@ entry:
- ; NORMAL-NEXT: movl $1, (%esp)
- ; NORMAL-NEXT: movl $2, %eax
- ; NORMAL-NEXT: call
--define void @test4(i32 %k) {
-+; NORMAL-NEXT: addl $12, %esp
-+define void @test4() optsize {
- entry:
-- %a = alloca i32, i32 %k
- call void @inreg(i32 1, i32 2, i32 3, i32 4)
- ret void
- }
-
--; Check that additional alignment is added when the pushes
--; don't add up to the required alignment.
--; ALIGNED-LABEL: test5
-+; When there is no reserved call frame, check that additional alignment
-+; is added when the pushes don't add up to the required alignment.
-+; ALIGNED-LABEL: test5:
- ; ALIGNED: subl $16, %esp
- ; ALIGNED-NEXT: pushl $4
- ; ALIGNED-NEXT: pushl $3
-@@ -97,7 +138,7 @@ entry:
- ; Check that pushing the addresses of globals (Or generally, things that
- ; aren't exactly immediates) isn't broken.
- ; Fixes PR21878.
--; NORMAL-LABEL: test6
-+; NORMAL-LABEL: test6:
- ; NORMAL: pushl $_ext
- ; NORMAL-NEXT: call
- declare void @f(i8*)
-@@ -110,3 +151,108 @@ bb:
- alloca i32
- ret void
- }
-+
-+; Check that we fold simple cases into the push
-+; NORMAL-LABEL: test7:
-+; NORMAL-NOT: subl {{.*}} %esp
-+; NORMAL: movl 4(%esp), [[EAX:%e..]]
-+; NORMAL-NEXT: pushl $4
-+; NORMAL-NEXT: pushl ([[EAX]])
-+; NORMAL-NEXT: pushl $2
-+; NORMAL-NEXT: pushl $1
-+; NORMAL-NEXT: call
-+; NORMAL-NEXT: addl $16, %esp
-+define void @test7(i32* %ptr) optsize {
-+entry:
-+ %val = load i32* %ptr
-+ call void @good(i32 1, i32 2, i32 %val, i32 4)
-+ ret void
-+}
-+
-+; But we don't want to fold stack-relative loads into the push,
-+; because the offset will be wrong
-+; NORMAL-LABEL: test8:
-+; NORMAL-NOT: subl {{.*}} %esp
-+; NORMAL: movl 4(%esp), [[EAX:%e..]]
-+; NORMAL-NEXT: pushl $4
-+; NORMAL-NEXT: pushl [[EAX]]
-+; NORMAL-NEXT: pushl $2
-+; NORMAL-NEXT: pushl $1
-+; NORMAL-NEXT: call
-+; NORMAL-NEXT: addl $16, %esp
-+define void @test8(i32* %ptr) optsize {
-+entry:
-+ %val = ptrtoint i32* %ptr to i32
-+ call void @good(i32 1, i32 2, i32 %val, i32 4)
-+ ret void
-+}
-+
-+; If one function is using push instructions, and the other isn't
-+; (because it has frame-index references), then we must resolve
-+; these references correctly.
-+; NORMAL-LABEL: test9:
-+; NORMAL-NOT: leal (%esp),
-+; NORMAL: pushl $4
-+; NORMAL-NEXT: pushl $3
-+; NORMAL-NEXT: pushl $2
-+; NORMAL-NEXT: pushl $1
-+; NORMAL-NEXT: call
-+; NORMAL-NEXT: addl $16, %esp
-+; NORMAL-NEXT: subl $16, %esp
-+; NORMAL-NEXT: leal 16(%esp), [[EAX:%e..]]
-+; NORMAL-NEXT: movl [[EAX]], 12(%esp)
-+; NORMAL-NEXT: movl $7, 8(%esp)
-+; NORMAL-NEXT: movl $6, 4(%esp)
-+; NORMAL-NEXT: movl $5, (%esp)
-+; NORMAL-NEXT: call
-+; NORMAL-NEXT: addl $16, %esp
-+define void @test9() optsize {
-+entry:
-+ %p = alloca i32, align 4
-+ call void @good(i32 1, i32 2, i32 3, i32 4)
-+ %0 = ptrtoint i32* %p to i32
-+ call void @good(i32 5, i32 6, i32 7, i32 %0)
-+ ret void
-+}
-+
-+; We can end up with an indirect call which gets reloaded on the spot.
-+; Make sure we reference the correct stack slot - we spill into (%esp)
-+; and reload from 16(%esp) due to the pushes.
-+; NORMAL-LABEL: test10:
-+; NORMAL: movl $_good, [[ALLOC:.*]]
-+; NORMAL-NEXT: movl [[ALLOC]], [[EAX:%e..]]
-+; NORMAL-NEXT: movl [[EAX]], (%esp) # 4-byte Spill
-+; NORMAL: nop
-+; NORMAL: pushl $4
-+; NORMAL-NEXT: pushl $3
-+; NORMAL-NEXT: pushl $2
-+; NORMAL-NEXT: pushl $1
-+; NORMAL-NEXT: calll *16(%esp)
-+; NORMAL-NEXT: addl $16, %esp
-+define void @test10() optsize {
-+ %stack_fptr = alloca void (i32, i32, i32, i32)*
-+ store void (i32, i32, i32, i32)* @good, void (i32, i32, i32, i32)** %stack_fptr
-+ %good_ptr = load volatile void (i32, i32, i32, i32)** %stack_fptr
-+ call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
-+ call void (i32, i32, i32, i32)* %good_ptr(i32 1, i32 2, i32 3, i32 4)
-+ ret void
-+}
-+
-+; We can't fold the load from the global into the push because of
-+; interference from the store
-+; NORMAL-LABEL: test11:
-+; NORMAL: movl _the_global, [[EAX:%e..]]
-+; NORMAL-NEXT: movl $42, _the_global
-+; NORMAL-NEXT: pushl $4
-+; NORMAL-NEXT: pushl $3
-+; NORMAL-NEXT: pushl $2
-+; NORMAL-NEXT: pushl [[EAX]]
-+; NORMAL-NEXT: call
-+; NORMAL-NEXT: addl $16, %esp
-+@the_global = external global i32
-+define void @test11() optsize {
-+ %myload = load i32* @the_global
-+ store i32 42, i32* @the_global
-+ call void @good(i32 %myload, i32 2, i32 3, i32 4)
-+ ret void
-+}
OpenPOWER on IntegriCloud