summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2016-12-26 20:36:37 +0000
committerdim <dim@FreeBSD.org>2016-12-26 20:36:37 +0000
commit06210ae42d418d50d8d9365d5c9419308ae9e7ee (patch)
treeab60b4cdd6e430dda1f292a46a77ddb744723f31 /contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
parent2dd166267f53df1c3748b4325d294b9b839de74b (diff)
downloadFreeBSD-src-06210ae42d418d50d8d9365d5c9419308ae9e7ee.zip
FreeBSD-src-06210ae42d418d50d8d9365d5c9419308ae9e7ee.tar.gz
MFC r309124:
Upgrade our copies of clang, llvm, lldb, compiler-rt and libc++ to 3.9.0 release, and add lld 3.9.0. Also completely revamp the build system for clang, llvm, lldb and their related tools. Please note that from 3.5.0 onwards, clang, llvm and lldb require C++11 support to build; see UPDATING for more information. Release notes for llvm, clang and lld are available here: <http://llvm.org/releases/3.9.0/docs/ReleaseNotes.html> <http://llvm.org/releases/3.9.0/tools/clang/docs/ReleaseNotes.html> <http://llvm.org/releases/3.9.0/tools/lld/docs/ReleaseNotes.html> Thanks to Ed Maste, Bryan Drewery, Andrew Turner, Antoine Brodin and Jan Beich for their help. Relnotes: yes MFC r309147: Pull in r282174 from upstream llvm trunk (by Krzysztof Parzyszek): [PPC] Set SP after loading data from stack frame, if no red zone is present Follow-up to r280705: Make sure that the SP is only restored after all data is loaded from the stack frame, if there is no red zone. This completes the fix for https://llvm.org/bugs/show_bug.cgi?id=26519. Differential Revision: https://reviews.llvm.org/D24466 Reported by: Mark Millard PR: 214433 MFC r309149: Pull in r283060 from upstream llvm trunk (by Hal Finkel): [PowerPC] Refactor soft-float support, and enable PPC64 soft float This change enables soft-float for PowerPC64, and also makes soft-float disable all vector instruction sets for both 32-bit and 64-bit modes. This latter part is necessary because the PPC backend canonicalizes many Altivec vector types to floating-point types, and so soft-float breaks scalarization support for many operations. Both for embedded targets and for operating-system kernels desiring soft-float support, it seems reasonable that disabling hardware floating-point also disables vector instructions (embedded targets without hardware floating point support are unlikely to have Altivec, etc. and operating system kernels desiring not to use floating-point registers to lower syscall cost are unlikely to want to use vector registers either). If someone needs this to work, we'll need to change the fact that we promote many Altivec operations to act on v4f32. To make it possible to disable Altivec when soft-float is enabled, hardware floating-point support needs to be expressed as a positive feature, like the others, and not a negative feature, because target features cannot have dependencies on the disabling of some other feature. So +soft-float has now become -hard-float. Fixes PR26970. Pull in r283061 from upstream clang trunk (by Hal Finkel): [PowerPC] Enable soft-float for PPC64, and +soft-float -> -hard-float Enable soft-float support on PPC64, as the backend now supports it. Also, the backend now uses -hard-float instead of +soft-float, so set the target features accordingly. Fixes PR26970. Reported by: Mark Millard PR: 214433 MFC r309212: Add a few missed clang 3.9.0 files to OptionalObsoleteFiles. MFC r309262: Fix packaging for clang, lldb and lld 3.9.0 During the upgrade of clang/llvm etc to 3.9.0 in r309124, the PACKAGE directive in the usr.bin/clang/*.mk files got dropped accidentally. Restore it, with a few minor changes and additions: * Correct license in clang.ucl to NCSA * Add PACKAGE=clang for clang and most of the "ll" tools * Put lldb in its own package * Put lld in its own package Reviewed by: gjb, jmallett Differential Revision: https://reviews.freebsd.org/D8666 MFC r309656: During the bootstrap phase, when building the minimal llvm library on PowerPC, add lib/Support/Atomic.cpp. This is needed because upstream llvm revision r271821 disabled the use of std::call_once, which causes some fallback functions from Atomic.cpp to be used instead. Reported by: Mark Millard PR: 214902 MFC r309835: Tentatively apply https://reviews.llvm.org/D18730 to work around gcc PR 70528 (bogus error: constructor required before non-static data member). This should fix buildworld with the external gcc package. Reported by: https://jenkins.freebsd.org/job/FreeBSD_HEAD_amd64_gcc/ MFC r310194: Upgrade our copies of clang, llvm, lld, lldb, compiler-rt and libc++ to 3.9.1 release. Please note that from 3.5.0 onwards, clang, llvm and lldb require C++11 support to build; see UPDATING for more information. Release notes for llvm, clang and lld will be available here: <http://releases.llvm.org/3.9.1/docs/ReleaseNotes.html> <http://releases.llvm.org/3.9.1/tools/clang/docs/ReleaseNotes.html> <http://releases.llvm.org/3.9.1/tools/lld/docs/ReleaseNotes.html> Relnotes: yes
Diffstat (limited to 'contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp')
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp722
1 files changed, 654 insertions, 68 deletions
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 537c147..0aa3b62 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -23,9 +23,12 @@
#include "WebAssembly.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
@@ -43,12 +46,13 @@ class WebAssemblyRegStackify final : public MachineFunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<MachineDominatorTree>();
AU.addRequired<LiveIntervals>();
AU.addPreserved<MachineBlockFrequencyInfo>();
AU.addPreserved<SlotIndexes>();
AU.addPreserved<LiveIntervals>();
- AU.addPreservedID(MachineDominatorsID);
AU.addPreservedID(LiveVariablesID);
+ AU.addPreserved<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -82,17 +86,197 @@ static void ImposeStackOrdering(MachineInstr *MI) {
/*isImp=*/true));
}
+// Determine whether a call to the callee referenced by
+// MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side
+// effects.
+static void QueryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read,
+ bool &Write, bool &Effects, bool &StackPointer) {
+ // All calls can use the stack pointer.
+ StackPointer = true;
+
+ const MachineOperand &MO = MI.getOperand(CalleeOpNo);
+ if (MO.isGlobal()) {
+ const Constant *GV = MO.getGlobal();
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ if (!GA->isInterposable())
+ GV = GA->getAliasee();
+
+ if (const Function *F = dyn_cast<Function>(GV)) {
+ if (!F->doesNotThrow())
+ Effects = true;
+ if (F->doesNotAccessMemory())
+ return;
+ if (F->onlyReadsMemory()) {
+ Read = true;
+ return;
+ }
+ }
+ }
+
+ // Assume the worst.
+ Write = true;
+ Read = true;
+ Effects = true;
+}
+
+// Determine whether MI reads memory, writes memory, has side effects,
+// and/or uses the __stack_pointer value.
+static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
+ bool &Write, bool &Effects, bool &StackPointer) {
+ assert(!MI.isPosition());
+ assert(!MI.isTerminator());
+
+ if (MI.isDebugValue())
+ return;
+
+ // Check for loads.
+ if (MI.mayLoad() && !MI.isInvariantLoad(&AA))
+ Read = true;
+
+ // Check for stores.
+ if (MI.mayStore()) {
+ Write = true;
+
+ // Check for stores to __stack_pointer.
+ for (auto MMO : MI.memoperands()) {
+ const MachinePointerInfo &MPI = MMO->getPointerInfo();
+ if (MPI.V.is<const PseudoSourceValue *>()) {
+ auto PSV = MPI.V.get<const PseudoSourceValue *>();
+ if (const ExternalSymbolPseudoSourceValue *EPSV =
+ dyn_cast<ExternalSymbolPseudoSourceValue>(PSV))
+ if (StringRef(EPSV->getSymbol()) == "__stack_pointer")
+ StackPointer = true;
+ }
+ }
+ } else if (MI.hasOrderedMemoryRef()) {
+ switch (MI.getOpcode()) {
+ case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
+ case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
+ case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
+ case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
+ case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
+ case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
+ case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
+ case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
+ // These instruction have hasUnmodeledSideEffects() returning true
+ // because they trap on overflow and invalid so they can't be arbitrarily
+ // moved, however hasOrderedMemoryRef() interprets this plus their lack
+ // of memoperands as having a potential unknown memory reference.
+ break;
+ default:
+ // Record volatile accesses, unless it's a call, as calls are handled
+ // specially below.
+ if (!MI.isCall()) {
+ Write = true;
+ Effects = true;
+ }
+ break;
+ }
+ }
+
+ // Check for side effects.
+ if (MI.hasUnmodeledSideEffects()) {
+ switch (MI.getOpcode()) {
+ case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
+ case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
+ case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
+ case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
+ case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
+ case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
+ case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
+ case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
+ // These instructions have hasUnmodeledSideEffects() returning true
+ // because they trap on overflow and invalid so they can't be arbitrarily
+ // moved, however in the specific case of register stackifying, it is safe
+ // to move them because overflow and invalid are Undefined Behavior.
+ break;
+ default:
+ Effects = true;
+ break;
+ }
+ }
+
+ // Analyze calls.
+ if (MI.isCall()) {
+ switch (MI.getOpcode()) {
+ case WebAssembly::CALL_VOID:
+ case WebAssembly::CALL_INDIRECT_VOID:
+ QueryCallee(MI, 0, Read, Write, Effects, StackPointer);
+ break;
+ case WebAssembly::CALL_I32: case WebAssembly::CALL_I64:
+ case WebAssembly::CALL_F32: case WebAssembly::CALL_F64:
+ case WebAssembly::CALL_INDIRECT_I32: case WebAssembly::CALL_INDIRECT_I64:
+ case WebAssembly::CALL_INDIRECT_F32: case WebAssembly::CALL_INDIRECT_F64:
+ QueryCallee(MI, 1, Read, Write, Effects, StackPointer);
+ break;
+ default:
+ llvm_unreachable("unexpected call opcode");
+ }
+ }
+}
+
+// Test whether Def is safe and profitable to rematerialize.
+static bool ShouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA,
+ const WebAssemblyInstrInfo *TII) {
+ return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def, &AA);
+}
+
+// Identify the definition for this register at this point. This is a
+// generalization of MachineRegisterInfo::getUniqueVRegDef that uses
+// LiveIntervals to handle complex cases.
+static MachineInstr *GetVRegDef(unsigned Reg, const MachineInstr *Insert,
+ const MachineRegisterInfo &MRI,
+ const LiveIntervals &LIS)
+{
+ // Most registers are in SSA form here so we try a quick MRI query first.
+ if (MachineInstr *Def = MRI.getUniqueVRegDef(Reg))
+ return Def;
+
+ // MRI doesn't know what the Def is. Try asking LIS.
+ if (const VNInfo *ValNo = LIS.getInterval(Reg).getVNInfoBefore(
+ LIS.getInstructionIndex(*Insert)))
+ return LIS.getInstructionFromIndex(ValNo->def);
+
+ return nullptr;
+}
+
+// Test whether Reg, as defined at Def, has exactly one use. This is a
+// generalization of MachineRegisterInfo::hasOneUse that uses LiveIntervals
+// to handle complex cases.
+static bool HasOneUse(unsigned Reg, MachineInstr *Def,
+ MachineRegisterInfo &MRI, MachineDominatorTree &MDT,
+ LiveIntervals &LIS) {
+ // Most registers are in SSA form here so we try a quick MRI query first.
+ if (MRI.hasOneUse(Reg))
+ return true;
+
+ bool HasOne = false;
+ const LiveInterval &LI = LIS.getInterval(Reg);
+ const VNInfo *DefVNI = LI.getVNInfoAt(
+ LIS.getInstructionIndex(*Def).getRegSlot());
+ assert(DefVNI);
+ for (auto I : MRI.use_nodbg_operands(Reg)) {
+ const auto &Result = LI.Query(LIS.getInstructionIndex(*I.getParent()));
+ if (Result.valueIn() == DefVNI) {
+ if (!Result.isKill())
+ return false;
+ if (HasOne)
+ return false;
+ HasOne = true;
+ }
+ }
+ return HasOne;
+}
+
// Test whether it's safe to move Def to just before Insert.
// TODO: Compute memory dependencies in a way that doesn't require always
// walking the block.
// TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
// more precise.
static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
- AliasAnalysis &AA, LiveIntervals &LIS,
- MachineRegisterInfo &MRI) {
+ AliasAnalysis &AA, const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI) {
assert(Def->getParent() == Insert->getParent());
- bool SawStore = false, SawSideEffects = false;
- MachineBasicBlock::const_iterator D(Def), I(Insert);
// Check for register dependencies.
for (const MachineOperand &MO : Def->operands()) {
@@ -106,6 +290,10 @@ static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
continue;
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions
+ // from moving down, and we've already checked for that.
+ if (Reg == WebAssembly::ARGUMENTS)
+ continue;
// If the physical register is never modified, ignore it.
if (!MRI.isPhysRegModified(Reg))
continue;
@@ -114,24 +302,404 @@ static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
}
// Ask LiveIntervals whether moving this virtual register use or def to
- // Insert will change value numbers are seen.
+ // Insert will change which value numbers are seen.
+ //
+ // If the operand is a use of a register that is also defined in the same
+ // instruction, test that the newly defined value reaches the insert point,
+ // since the operand will be moving along with the def.
const LiveInterval &LI = LIS.getInterval(Reg);
- VNInfo *DefVNI = MO.isDef() ?
- LI.getVNInfoAt(LIS.getInstructionIndex(Def).getRegSlot()) :
- LI.getVNInfoBefore(LIS.getInstructionIndex(Def));
+ VNInfo *DefVNI =
+ (MO.isDef() || Def->definesRegister(Reg)) ?
+ LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot()) :
+ LI.getVNInfoBefore(LIS.getInstructionIndex(*Def));
assert(DefVNI && "Instruction input missing value number");
- VNInfo *InsVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(Insert));
+ VNInfo *InsVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*Insert));
if (InsVNI && DefVNI != InsVNI)
return false;
}
- // Check for memory dependencies and side effects.
- for (--I; I != D; --I)
- SawSideEffects |= I->isSafeToMove(&AA, SawStore);
- return !(SawStore && Def->mayLoad() && !Def->isInvariantLoad(&AA)) &&
- !(SawSideEffects && !Def->isSafeToMove(&AA, SawStore));
+ bool Read = false, Write = false, Effects = false, StackPointer = false;
+ Query(*Def, AA, Read, Write, Effects, StackPointer);
+
+ // If the instruction does not access memory and has no side effects, it has
+ // no additional dependencies.
+ if (!Read && !Write && !Effects && !StackPointer)
+ return true;
+
+ // Scan through the intervening instructions between Def and Insert.
+ MachineBasicBlock::const_iterator D(Def), I(Insert);
+ for (--I; I != D; --I) {
+ bool InterveningRead = false;
+ bool InterveningWrite = false;
+ bool InterveningEffects = false;
+ bool InterveningStackPointer = false;
+ Query(*I, AA, InterveningRead, InterveningWrite, InterveningEffects,
+ InterveningStackPointer);
+ if (Effects && InterveningEffects)
+ return false;
+ if (Read && InterveningWrite)
+ return false;
+ if (Write && (InterveningRead || InterveningWrite))
+ return false;
+ if (StackPointer && InterveningStackPointer)
+ return false;
+ }
+
+ return true;
+}
+
+/// Test whether OneUse, a use of Reg, dominates all of Reg's other uses.
+static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
+ const MachineBasicBlock &MBB,
+ const MachineRegisterInfo &MRI,
+ const MachineDominatorTree &MDT,
+ LiveIntervals &LIS,
+ WebAssemblyFunctionInfo &MFI) {
+ const LiveInterval &LI = LIS.getInterval(Reg);
+
+ const MachineInstr *OneUseInst = OneUse.getParent();
+ VNInfo *OneUseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*OneUseInst));
+
+ for (const MachineOperand &Use : MRI.use_operands(Reg)) {
+ if (&Use == &OneUse)
+ continue;
+
+ const MachineInstr *UseInst = Use.getParent();
+ VNInfo *UseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*UseInst));
+
+ if (UseVNI != OneUseVNI)
+ continue;
+
+ const MachineInstr *OneUseInst = OneUse.getParent();
+ if (UseInst == OneUseInst) {
+ // Another use in the same instruction. We need to ensure that the one
+ // selected use happens "before" it.
+ if (&OneUse > &Use)
+ return false;
+ } else {
+ // Test that the use is dominated by the one selected use.
+ while (!MDT.dominates(OneUseInst, UseInst)) {
+ // Actually, dominating is over-conservative. Test that the use would
+ // happen after the one selected use in the stack evaluation order.
+ //
+ // This is needed as a consequence of using implicit get_locals for
+ // uses and implicit set_locals for defs.
+ if (UseInst->getDesc().getNumDefs() == 0)
+ return false;
+ const MachineOperand &MO = UseInst->getOperand(0);
+ if (!MO.isReg())
+ return false;
+ unsigned DefReg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DefReg) ||
+ !MFI.isVRegStackified(DefReg))
+ return false;
+ assert(MRI.hasOneUse(DefReg));
+ const MachineOperand &NewUse = *MRI.use_begin(DefReg);
+ const MachineInstr *NewUseInst = NewUse.getParent();
+ if (NewUseInst == OneUseInst) {
+ if (&OneUse > &NewUse)
+ return false;
+ break;
+ }
+ UseInst = NewUseInst;
+ }
+ }
+ }
+ return true;
+}
+
+/// Get the appropriate tee_local opcode for the given register class.
+static unsigned GetTeeLocalOpcode(const TargetRegisterClass *RC) {
+ if (RC == &WebAssembly::I32RegClass)
+ return WebAssembly::TEE_LOCAL_I32;
+ if (RC == &WebAssembly::I64RegClass)
+ return WebAssembly::TEE_LOCAL_I64;
+ if (RC == &WebAssembly::F32RegClass)
+ return WebAssembly::TEE_LOCAL_F32;
+ if (RC == &WebAssembly::F64RegClass)
+ return WebAssembly::TEE_LOCAL_F64;
+ llvm_unreachable("Unexpected register class");
+}
+
+// Shrink LI to its uses, cleaning up LI.
+static void ShrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
+ if (LIS.shrinkToUses(&LI)) {
+ SmallVector<LiveInterval*, 4> SplitLIs;
+ LIS.splitSeparateComponents(LI, SplitLIs);
+ }
+}
+
+/// A single-use def in the same block with no intervening memory or register
+/// dependencies; move the def down and nest it with the current instruction.
+static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand& Op,
+ MachineInstr *Def,
+ MachineBasicBlock &MBB,
+ MachineInstr *Insert, LiveIntervals &LIS,
+ WebAssemblyFunctionInfo &MFI,
+ MachineRegisterInfo &MRI) {
+ DEBUG(dbgs() << "Move for single use: "; Def->dump());
+
+ MBB.splice(Insert, &MBB, Def);
+ LIS.handleMove(*Def);
+
+ if (MRI.hasOneDef(Reg) && MRI.hasOneUse(Reg)) {
+ // No one else is using this register for anything so we can just stackify
+ // it in place.
+ MFI.stackifyVReg(Reg);
+ } else {
+ // The register may have unrelated uses or defs; create a new register for
+ // just our one def and use so that we can stackify it.
+ unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
+ Def->getOperand(0).setReg(NewReg);
+ Op.setReg(NewReg);
+
+ // Tell LiveIntervals about the new register.
+ LIS.createAndComputeVirtRegInterval(NewReg);
+
+ // Tell LiveIntervals about the changes to the old register.
+ LiveInterval &LI = LIS.getInterval(Reg);
+ LI.removeSegment(LIS.getInstructionIndex(*Def).getRegSlot(),
+ LIS.getInstructionIndex(*Op.getParent()).getRegSlot(),
+ /*RemoveDeadValNo=*/true);
+
+ MFI.stackifyVReg(NewReg);
+
+ DEBUG(dbgs() << " - Replaced register: "; Def->dump());
+ }
+
+ ImposeStackOrdering(Def);
+ return Def;
+}
+
+/// A trivially cloneable instruction; clone it and nest the new copy with the
+/// current instruction.
+static MachineInstr *RematerializeCheapDef(
+ unsigned Reg, MachineOperand &Op, MachineInstr &Def, MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator Insert, LiveIntervals &LIS,
+ WebAssemblyFunctionInfo &MFI, MachineRegisterInfo &MRI,
+ const WebAssemblyInstrInfo *TII, const WebAssemblyRegisterInfo *TRI) {
+ DEBUG(dbgs() << "Rematerializing cheap def: "; Def.dump());
+ DEBUG(dbgs() << " - for use in "; Op.getParent()->dump());
+
+ unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
+ TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI);
+ Op.setReg(NewReg);
+ MachineInstr *Clone = &*std::prev(Insert);
+ LIS.InsertMachineInstrInMaps(*Clone);
+ LIS.createAndComputeVirtRegInterval(NewReg);
+ MFI.stackifyVReg(NewReg);
+ ImposeStackOrdering(Clone);
+
+ DEBUG(dbgs() << " - Cloned to "; Clone->dump());
+
+ // Shrink the interval.
+ bool IsDead = MRI.use_empty(Reg);
+ if (!IsDead) {
+ LiveInterval &LI = LIS.getInterval(Reg);
+ ShrinkToUses(LI, LIS);
+ IsDead = !LI.liveAt(LIS.getInstructionIndex(Def).getDeadSlot());
+ }
+
+ // If that was the last use of the original, delete the original.
+ if (IsDead) {
+ DEBUG(dbgs() << " - Deleting original\n");
+ SlotIndex Idx = LIS.getInstructionIndex(Def).getRegSlot();
+ LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx);
+ LIS.removeInterval(Reg);
+ LIS.RemoveMachineInstrFromMaps(Def);
+ Def.eraseFromParent();
+ }
+
+ return Clone;
}
+/// A multiple-use def in the same block with no intervening memory or register
+/// dependencies; move the def down, nest it with the current instruction, and
+/// insert a tee_local to satisfy the rest of the uses. As an illustration,
+/// rewrite this:
+///
+/// Reg = INST ... // Def
+/// INST ..., Reg, ... // Insert
+/// INST ..., Reg, ...
+/// INST ..., Reg, ...
+///
+/// to this:
+///
+/// DefReg = INST ... // Def (to become the new Insert)
+/// TeeReg, Reg = TEE_LOCAL_... DefReg
+/// INST ..., TeeReg, ... // Insert
+/// INST ..., Reg, ...
+/// INST ..., Reg, ...
+///
+/// with DefReg and TeeReg stackified. This eliminates a get_local from the
+/// resulting code.
+static MachineInstr *MoveAndTeeForMultiUse(
+ unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB,
+ MachineInstr *Insert, LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI,
+ MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) {
+ DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump());
+
+ // Move Def into place.
+ MBB.splice(Insert, &MBB, Def);
+ LIS.handleMove(*Def);
+
+ // Create the Tee and attach the registers.
+ const auto *RegClass = MRI.getRegClass(Reg);
+ unsigned TeeReg = MRI.createVirtualRegister(RegClass);
+ unsigned DefReg = MRI.createVirtualRegister(RegClass);
+ MachineOperand &DefMO = Def->getOperand(0);
+ MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(),
+ TII->get(GetTeeLocalOpcode(RegClass)), TeeReg)
+ .addReg(Reg, RegState::Define)
+ .addReg(DefReg, getUndefRegState(DefMO.isDead()));
+ Op.setReg(TeeReg);
+ DefMO.setReg(DefReg);
+ SlotIndex TeeIdx = LIS.InsertMachineInstrInMaps(*Tee).getRegSlot();
+ SlotIndex DefIdx = LIS.getInstructionIndex(*Def).getRegSlot();
+
+ // Tell LiveIntervals we moved the original vreg def from Def to Tee.
+ LiveInterval &LI = LIS.getInterval(Reg);
+ LiveInterval::iterator I = LI.FindSegmentContaining(DefIdx);
+ VNInfo *ValNo = LI.getVNInfoAt(DefIdx);
+ I->start = TeeIdx;
+ ValNo->def = TeeIdx;
+ ShrinkToUses(LI, LIS);
+
+ // Finish stackifying the new regs.
+ LIS.createAndComputeVirtRegInterval(TeeReg);
+ LIS.createAndComputeVirtRegInterval(DefReg);
+ MFI.stackifyVReg(DefReg);
+ MFI.stackifyVReg(TeeReg);
+ ImposeStackOrdering(Def);
+ ImposeStackOrdering(Tee);
+
+ DEBUG(dbgs() << " - Replaced register: "; Def->dump());
+ DEBUG(dbgs() << " - Tee instruction: "; Tee->dump());
+ return Def;
+}
+
+namespace {
+/// A stack for walking the tree of instructions being built, visiting the
+/// MachineOperands in DFS order.
+class TreeWalkerState {
+ typedef MachineInstr::mop_iterator mop_iterator;
+ typedef std::reverse_iterator<mop_iterator> mop_reverse_iterator;
+ typedef iterator_range<mop_reverse_iterator> RangeTy;
+ SmallVector<RangeTy, 4> Worklist;
+
+public:
+ explicit TreeWalkerState(MachineInstr *Insert) {
+ const iterator_range<mop_iterator> &Range = Insert->explicit_uses();
+ if (Range.begin() != Range.end())
+ Worklist.push_back(reverse(Range));
+ }
+
+ bool Done() const { return Worklist.empty(); }
+
+ MachineOperand &Pop() {
+ RangeTy &Range = Worklist.back();
+ MachineOperand &Op = *Range.begin();
+ Range = drop_begin(Range, 1);
+ if (Range.begin() == Range.end())
+ Worklist.pop_back();
+ assert((Worklist.empty() ||
+ Worklist.back().begin() != Worklist.back().end()) &&
+ "Empty ranges shouldn't remain in the worklist");
+ return Op;
+ }
+
+ /// Push Instr's operands onto the stack to be visited.
+ void PushOperands(MachineInstr *Instr) {
+ const iterator_range<mop_iterator> &Range(Instr->explicit_uses());
+ if (Range.begin() != Range.end())
+ Worklist.push_back(reverse(Range));
+ }
+
+ /// Some of Instr's operands are on the top of the stack; remove them and
+ /// re-insert them starting from the beginning (because we've commuted them).
+ void ResetTopOperands(MachineInstr *Instr) {
+ assert(HasRemainingOperands(Instr) &&
+ "Reseting operands should only be done when the instruction has "
+ "an operand still on the stack");
+ Worklist.back() = reverse(Instr->explicit_uses());
+ }
+
+ /// Test whether Instr has operands remaining to be visited at the top of
+ /// the stack.
+ bool HasRemainingOperands(const MachineInstr *Instr) const {
+ if (Worklist.empty())
+ return false;
+ const RangeTy &Range = Worklist.back();
+ return Range.begin() != Range.end() && Range.begin()->getParent() == Instr;
+ }
+
+ /// Test whether the given register is present on the stack, indicating an
+ /// operand in the tree that we haven't visited yet. Moving a definition of
+ /// Reg to a point in the tree after that would change its value.
+ ///
+ /// This is needed as a consequence of using implicit get_locals for
+ /// uses and implicit set_locals for defs.
+ bool IsOnStack(unsigned Reg) const {
+ for (const RangeTy &Range : Worklist)
+ for (const MachineOperand &MO : Range)
+ if (MO.isReg() && MO.getReg() == Reg)
+ return true;
+ return false;
+ }
+};
+
+/// State to keep track of whether commuting is in flight or whether it's been
+/// tried for the current instruction and didn't work.
+class CommutingState {
+ /// There are effectively three states: the initial state where we haven't
+ /// started commuting anything and we don't know anything yet, the tenative
+ /// state where we've commuted the operands of the current instruction and are
+ /// revisting it, and the declined state where we've reverted the operands
+ /// back to their original order and will no longer commute it further.
+ bool TentativelyCommuting;
+ bool Declined;
+
+ /// During the tentative state, these hold the operand indices of the commuted
+ /// operands.
+ unsigned Operand0, Operand1;
+
+public:
+ CommutingState() : TentativelyCommuting(false), Declined(false) {}
+
+ /// Stackification for an operand was not successful due to ordering
+ /// constraints. If possible, and if we haven't already tried it and declined
+ /// it, commute Insert's operands and prepare to revisit it.
+ void MaybeCommute(MachineInstr *Insert, TreeWalkerState &TreeWalker,
+ const WebAssemblyInstrInfo *TII) {
+ if (TentativelyCommuting) {
+ assert(!Declined &&
+ "Don't decline commuting until you've finished trying it");
+ // Commuting didn't help. Revert it.
+ TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
+ TentativelyCommuting = false;
+ Declined = true;
+ } else if (!Declined && TreeWalker.HasRemainingOperands(Insert)) {
+ Operand0 = TargetInstrInfo::CommuteAnyOperandIndex;
+ Operand1 = TargetInstrInfo::CommuteAnyOperandIndex;
+ if (TII->findCommutedOpIndices(*Insert, Operand0, Operand1)) {
+ // Tentatively commute the operands and try again.
+ TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
+ TreeWalker.ResetTopOperands(Insert);
+ TentativelyCommuting = true;
+ Declined = false;
+ }
+ }
+ }
+
+ /// Stackification for some operand was successful. Reset to the default
+ /// state.
+ void Reset() {
+ TentativelyCommuting = false;
+ Declined = false;
+ }
+};
+} // end anonymous namespace
+
bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** Register Stackifying **********\n"
"********** Function: "
@@ -140,7 +708,10 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
MachineRegisterInfo &MRI = MF.getRegInfo();
WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+ const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
LiveIntervals &LIS = getAnalysis<LiveIntervals>();
// Walk the instructions from the bottom up. Currently we don't look past
@@ -151,33 +722,37 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
// iterating over it and the end iterator may change.
for (auto MII = MBB.rbegin(); MII != MBB.rend(); ++MII) {
MachineInstr *Insert = &*MII;
- // Don't nest anything inside a phi.
- if (Insert->getOpcode() == TargetOpcode::PHI)
- break;
-
// Don't nest anything inside an inline asm, because we don't have
// constraints for $push inputs.
if (Insert->getOpcode() == TargetOpcode::INLINEASM)
- break;
+ continue;
+
+ // Ignore debugging intrinsics.
+ if (Insert->getOpcode() == TargetOpcode::DBG_VALUE)
+ continue;
// Iterate through the inputs in reverse order, since we'll be pulling
// operands off the stack in LIFO order.
- bool AnyStackified = false;
- for (MachineOperand &Op : reverse(Insert->uses())) {
+ CommutingState Commuting;
+ TreeWalkerState TreeWalker(Insert);
+ while (!TreeWalker.Done()) {
+ MachineOperand &Op = TreeWalker.Pop();
+
// We're only interested in explicit virtual register operands.
- if (!Op.isReg() || Op.isImplicit() || !Op.isUse())
+ if (!Op.isReg())
continue;
unsigned Reg = Op.getReg();
-
- // Only consider registers with a single definition.
- // TODO: Eventually we may relax this, to stackify phi transfers.
- MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
- if (!Def)
+ assert(Op.isUse() && "explicit_uses() should only iterate over uses");
+ assert(!Op.isImplicit() &&
+ "explicit_uses() should only iterate over explicit operands");
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
continue;
- // There's no use in nesting implicit defs inside anything.
- if (Def->getOpcode() == TargetOpcode::IMPLICIT_DEF)
+ // Identify the definition for this register at this point. Most
+ // registers are in SSA form here so we try a quick MRI query first.
+ MachineInstr *Def = GetVRegDef(Reg, Insert, MRI, LIS);
+ if (!Def)
continue;
// Don't nest an INLINE_ASM def into anything, because we don't have
@@ -185,10 +760,6 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
if (Def->getOpcode() == TargetOpcode::INLINEASM)
continue;
- // Don't nest PHIs inside of anything.
- if (Def->getOpcode() == TargetOpcode::PHI)
- continue;
-
// Argument instructions represent live-in registers and not real
// instructions.
if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 ||
@@ -197,38 +768,53 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
Def->getOpcode() == WebAssembly::ARGUMENT_F64)
continue;
- // Single-use expression trees require defs that have one use.
- // TODO: Eventually we'll relax this, to take advantage of set_local
- // returning its result.
- if (!MRI.hasOneUse(Reg))
- continue;
-
- // For now, be conservative and don't look across block boundaries.
- // TODO: Be more aggressive?
- if (Def->getParent() != &MBB)
+ // Decide which strategy to take. Prefer to move a single-use value
+ // over cloning it, and prefer cloning over introducing a tee_local.
+ // For moving, we require the def to be in the same block as the use;
+ // this makes things simpler (LiveIntervals' handleMove function only
+ // supports intra-block moves) and it's MachineSink's job to catch all
+ // the sinking opportunities anyway.
+ bool SameBlock = Def->getParent() == &MBB;
+ bool CanMove = SameBlock && IsSafeToMove(Def, Insert, AA, LIS, MRI) &&
+ !TreeWalker.IsOnStack(Reg);
+ if (CanMove && HasOneUse(Reg, Def, MRI, MDT, LIS)) {
+ Insert = MoveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
+ } else if (ShouldRematerialize(*Def, AA, TII)) {
+ Insert =
+ RematerializeCheapDef(Reg, Op, *Def, MBB, Insert->getIterator(),
+ LIS, MFI, MRI, TII, TRI);
+ } else if (CanMove &&
+ OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) {
+ Insert = MoveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI,
+ MRI, TII);
+ } else {
+ // We failed to stackify the operand. If the problem was ordering
+ // constraints, Commuting may be able to help.
+ if (!CanMove && SameBlock)
+ Commuting.MaybeCommute(Insert, TreeWalker, TII);
+ // Proceed to the next operand.
continue;
+ }
- // Don't move instructions that have side effects or memory dependencies
- // or other complications.
- if (!IsSafeToMove(Def, Insert, AA, LIS, MRI))
- continue;
+ // We stackified an operand. Add the defining instruction's operands to
+ // the worklist stack now to continue to build an ever deeper tree.
+ Commuting.Reset();
+ TreeWalker.PushOperands(Insert);
+ }
+ // If we stackified any operands, skip over the tree to start looking for
+ // the next instruction we can build a tree on.
+ if (Insert != &*MII) {
+ ImposeStackOrdering(&*MII);
+ MII = std::prev(
+ llvm::make_reverse_iterator(MachineBasicBlock::iterator(Insert)));
Changed = true;
- AnyStackified = true;
- // Move the def down and nest it in the current instruction.
- MBB.splice(Insert, &MBB, Def);
- LIS.handleMove(Def);
- MFI.stackifyVReg(Reg);
- ImposeStackOrdering(Def);
- Insert = Def;
}
- if (AnyStackified)
- ImposeStackOrdering(&*MII);
}
}
- // If we used EXPR_STACK anywhere, add it to the live-in sets everywhere
- // so that it never looks like a use-before-def.
+ // If we used EXPR_STACK anywhere, add it to the live-in sets everywhere so
+ // that it never looks like a use-before-def.
if (Changed) {
MF.getRegInfo().addLiveIn(WebAssembly::EXPR_STACK);
for (MachineBasicBlock &MBB : MF)
@@ -236,30 +822,30 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
}
#ifndef NDEBUG
- // Verify that pushes and pops are performed in FIFO order.
+ // Verify that pushes and pops are performed in LIFO order.
SmallVector<unsigned, 0> Stack;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
+ if (MI.isDebugValue())
+ continue;
for (MachineOperand &MO : reverse(MI.explicit_operands())) {
if (!MO.isReg())
continue;
- unsigned VReg = MO.getReg();
-
- // Don't stackify physregs like SP or FP.
- if (!TargetRegisterInfo::isVirtualRegister(VReg))
- continue;
+ unsigned Reg = MO.getReg();
- if (MFI.isVRegStackified(VReg)) {
+ if (MFI.isVRegStackified(Reg)) {
if (MO.isDef())
- Stack.push_back(VReg);
+ Stack.push_back(Reg);
else
- assert(Stack.pop_back_val() == VReg);
+ assert(Stack.pop_back_val() == Reg &&
+ "Register stack pop should be paired with a push");
}
}
}
// TODO: Generalize this code to support keeping values on the stack across
// basic block boundaries.
- assert(Stack.empty());
+ assert(Stack.empty() &&
+ "Register stack pushes and pops should be balanced");
}
#endif
OpenPOWER on IntegriCloud