diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
201 files changed, 25562 insertions, 12616 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 4060db7..a736884 100644 --- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -180,7 +180,7 @@ void AggressiveAntiDepBreaker::FinishBlock() { State = nullptr; } -void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, +void AggressiveAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count, unsigned InsertPosIndex) { assert(Count < InsertPosIndex && "Instruction index out of expected range!"); @@ -190,7 +190,7 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, ScanInstruction(MI, Count); DEBUG(dbgs() << "Observe: "); - DEBUG(MI->dump()); + DEBUG(MI.dump()); DEBUG(dbgs() << "\tRegs:"); std::vector<unsigned> &DefIndices = State->GetDefIndices(); @@ -214,9 +214,8 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, DEBUG(dbgs() << '\n'); } -bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI, - MachineOperand& MO) -{ +bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr &MI, + MachineOperand &MO) { if (!MO.isReg() || !MO.isImplicit()) return false; @@ -226,19 +225,19 @@ bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI, MachineOperand *Op = nullptr; if (MO.isDef()) - Op = MI->findRegisterUseOperand(Reg, true); + Op = MI.findRegisterUseOperand(Reg, true); else - Op = MI->findRegisterDefOperand(Reg); + Op = MI.findRegisterDefOperand(Reg); return(Op && Op->isImplicit()); } -void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI, - std::set<unsigned>& PassthruRegs) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); +void AggressiveAntiDepBreaker::GetPassthruRegs( + MachineInstr &MI, std::set<unsigned> &PassthruRegs) { + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; - if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) || + if ((MO.isDef() && MI.isRegTiedToUseOperand(i)) || IsImplicitDefUse(MI, MO)) { const unsigned Reg = MO.getReg(); for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); @@ -313,28 +312,30 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, DEBUG(if (header) { dbgs() << header << TRI->getName(Reg); header = nullptr; }); DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag); - } - // Repeat for subregisters. - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { - unsigned SubregReg = *SubRegs; - if (!State->IsLive(SubregReg)) { - KillIndices[SubregReg] = KillIdx; - DefIndices[SubregReg] = ~0u; - RegRefs.erase(SubregReg); - State->LeaveGroup(SubregReg); - DEBUG(if (header) { - dbgs() << header << TRI->getName(Reg); header = nullptr; }); - DEBUG(dbgs() << " " << TRI->getName(SubregReg) << "->g" << - State->GetGroup(SubregReg) << tag); + // Repeat for subregisters. Note that we only do this if the superregister + // was not live because otherwise, regardless whether we have an explicit + // use of the subregister, the subregister's contents are needed for the + // uses of the superregister. + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubregReg = *SubRegs; + if (!State->IsLive(SubregReg)) { + KillIndices[SubregReg] = KillIdx; + DefIndices[SubregReg] = ~0u; + RegRefs.erase(SubregReg); + State->LeaveGroup(SubregReg); + DEBUG(if (header) { + dbgs() << header << TRI->getName(Reg); header = nullptr; }); + DEBUG(dbgs() << " " << TRI->getName(SubregReg) << "->g" << + State->GetGroup(SubregReg) << tag); + } } } DEBUG(if (!header && footer) dbgs() << footer); } -void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, - unsigned Count, - std::set<unsigned>& PassthruRegs) { +void AggressiveAntiDepBreaker::PrescanInstruction( + MachineInstr &MI, unsigned Count, std::set<unsigned> &PassthruRegs) { std::vector<unsigned> &DefIndices = State->GetDefIndices(); std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& RegRefs = State->GetRegRefs(); @@ -344,8 +345,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // dead, or because only a subregister is live at the def. If we // don't do this the dead def will be incorrectly merged into the // previous def. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; @@ -354,8 +355,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, } DEBUG(dbgs() << "\tDef Groups:"); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; @@ -367,8 +368,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // defined in a call must not be changed (ABI). Inline assembly may // reference either system calls or the register directly. Skip it until we // can tell user specified registers from compiler-specified. - if (MI->isCall() || MI->hasExtraDefRegAllocReq() || - TII->isPredicated(MI) || MI->isInlineAsm()) { + if (MI.isCall() || MI.hasExtraDefRegAllocReq() || TII->isPredicated(MI) || + MI.isInlineAsm()) { DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); State->UnionGroups(Reg, 0); } @@ -386,8 +387,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // Note register reference... const TargetRegisterClass *RC = nullptr; - if (i < MI->getDesc().getNumOperands()) - RC = TII->getRegClass(MI->getDesc(), i, TRI, MF); + if (i < MI.getDesc().getNumOperands()) + RC = TII->getRegClass(MI.getDesc(), i, TRI, MF); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; RegRefs.insert(std::make_pair(Reg, RR)); } @@ -396,13 +397,13 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // Scan the register defs for this instruction and update // live-ranges. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; // Ignore KILLs and passthru registers for liveness... - if (MI->isKill() || (PassthruRegs.count(Reg) != 0)) + if (MI.isKill() || (PassthruRegs.count(Reg) != 0)) continue; // Update def for Reg and aliases. @@ -421,7 +422,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, } } -void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, +void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) { DEBUG(dbgs() << "\tUse Groups:"); std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& @@ -444,14 +445,13 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, // instruction which may not be executed. The second R6 def may or may not // re-define R6 so it's not safe to change it since the last R6 use cannot be // changed. - bool Special = MI->isCall() || - MI->hasExtraSrcRegAllocReq() || - TII->isPredicated(MI) || MI->isInlineAsm(); + bool Special = MI.isCall() || MI.hasExtraSrcRegAllocReq() || + TII->isPredicated(MI) || MI.isInlineAsm(); // Scan the register uses for this instruction and update // live-ranges, groups and RegRefs. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; @@ -471,8 +471,8 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, // Note register reference... const TargetRegisterClass *RC = nullptr; - if (i < MI->getDesc().getNumOperands()) - RC = TII->getRegClass(MI->getDesc(), i, TRI, MF); + if (i < MI.getDesc().getNumOperands()) + RC = TII->getRegClass(MI.getDesc(), i, TRI, MF); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; RegRefs.insert(std::make_pair(Reg, RR)); } @@ -481,12 +481,12 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, // Form a group of all defs and uses of a KILL instruction to ensure // that all registers are renamed as a group. - if (MI->isKill()) { + if (MI.isKill()) { DEBUG(dbgs() << "\tKill Group:"); unsigned FirstReg = 0; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; @@ -563,13 +563,16 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( if (RegRefs.count(Reg) > 0) { DEBUG(dbgs() << "\t\t" << TRI->getName(Reg) << ":"); - BitVector BV = GetRenameRegisters(Reg); - RenameRegisterMap.insert(std::pair<unsigned, BitVector>(Reg, BV)); + BitVector &BV = RenameRegisterMap[Reg]; + assert(BV.empty()); + BV = GetRenameRegisters(Reg); - DEBUG(dbgs() << " ::"); - DEBUG(for (int r = BV.find_first(); r != -1; r = BV.find_next(r)) - dbgs() << " " << TRI->getName(r)); - DEBUG(dbgs() << "\n"); + DEBUG({ + dbgs() << " ::"; + for (int r = BV.find_first(); r != -1; r = BV.find_next(r)) + dbgs() << " " << TRI->getName(r); + dbgs() << "\n"; + }); } } @@ -650,8 +653,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( DEBUG(dbgs() << " " << TRI->getName(NewReg)); // Check if Reg can be renamed to NewReg. - BitVector BV = RenameRegisterMap[Reg]; - if (!BV.test(NewReg)) { + if (!RenameRegisterMap[Reg].test(NewReg)) { DEBUG(dbgs() << "(no rename)"); goto next_super_reg; } @@ -785,6 +787,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( DEBUG(dbgs() << '\n'); #endif + BitVector RegAliases(TRI->getNumRegs()); + // Attempt to break anti-dependence edges. Walk the instructions // from the bottom up, tracking information about liveness as we go // to help determine which registers are available. @@ -792,13 +796,13 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( unsigned Count = InsertPosIndex - 1; for (MachineBasicBlock::iterator I = End, E = Begin; I != E; --Count) { - MachineInstr *MI = --I; + MachineInstr &MI = *--I; - if (MI->isDebugValue()) + if (MI.isDebugValue()) continue; DEBUG(dbgs() << "Anti: "); - DEBUG(MI->dump()); + DEBUG(MI.dump()); std::set<unsigned> PassthruRegs; GetPassthruRegs(MI, PassthruRegs); @@ -809,13 +813,13 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // The dependence edges that represent anti- and output- // dependencies that are candidates for breaking. std::vector<const SDep *> Edges; - const SUnit *PathSU = MISUnitMap[MI]; + const SUnit *PathSU = MISUnitMap[&MI]; AntiDepEdges(PathSU, Edges); // If MI is not on the critical path, then we don't rename // registers in the CriticalPathSet. BitVector *ExcludeRegs = nullptr; - if (MI == CriticalPathMI) { + if (&MI == CriticalPathMI) { CriticalPathSU = CriticalPathStep(CriticalPathSU); CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : nullptr; } else if (CriticalPathSet.any()) { @@ -824,7 +828,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // Ignore KILL instructions (they form a group in ScanInstruction // but don't cause any anti-dependence breaking themselves) - if (!MI->isKill()) { + if (!MI.isKill()) { // Attempt to break each anti-dependency... for (unsigned i = 0, e = Edges.size(); i != e; ++i) { const SDep *Edge = Edges[i]; @@ -854,7 +858,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( continue; } else { // No anti-dep breaking for implicit deps - MachineOperand *AntiDepOp = MI->findRegisterDefOperand(AntiDepReg); + MachineOperand *AntiDepOp = MI.findRegisterDefOperand(AntiDepReg); assert(AntiDepOp && "Can't find index for defined register operand"); if (!AntiDepOp || AntiDepOp->isImplicit()) { DEBUG(dbgs() << " (implicit)\n"); @@ -896,6 +900,29 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( } if (AntiDepReg == 0) continue; + + // If the definition of the anti-dependency register does not start + // a new live range, bail out. This can happen if the anti-dep + // register is a sub-register of another register whose live range + // spans over PathSU. In such case, PathSU defines only a part of + // the larger register. + RegAliases.reset(); + for (MCRegAliasIterator AI(AntiDepReg, TRI, true); AI.isValid(); ++AI) + RegAliases.set(*AI); + for (SDep S : PathSU->Succs) { + SDep::Kind K = S.getKind(); + if (K != SDep::Data && K != SDep::Output && K != SDep::Anti) + continue; + unsigned R = S.getReg(); + if (!RegAliases[R]) + continue; + if (R == AntiDepReg || TRI->isSubRegister(AntiDepReg, R)) + continue; + AntiDepReg = 0; + break; + } + + if (AntiDepReg == 0) continue; } assert(AntiDepReg != 0); @@ -938,7 +965,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( for (DbgValueVector::iterator DVI = DbgValues.begin(), DVE = DbgValues.end(); DVI != DVE; ++DVI) if (DVI->second == Q.second.Operand->getParent()) - UpdateDbgValue(DVI->first, AntiDepReg, NewReg); + UpdateDbgValue(*DVI->first, AntiDepReg, NewReg); } // We just went back in time and modified history; the diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h index eba7383..f97e666 100644 --- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -144,7 +144,7 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState { /// Update liveness information to account for the current /// instruction, which will not be scheduled. /// - void Observe(MachineInstr *MI, unsigned Count, + void Observe(MachineInstr &MI, unsigned Count, unsigned InsertPosIndex) override; /// Finish anti-dep breaking for a basic block. @@ -156,19 +156,19 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState { /// Return true if MO represents a register /// that is both implicitly used and defined in MI - bool IsImplicitDefUse(MachineInstr *MI, MachineOperand& MO); + bool IsImplicitDefUse(MachineInstr &MI, MachineOperand &MO); /// If MI implicitly def/uses a register, then /// return that register and all subregisters. - void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs); + void GetPassthruRegs(MachineInstr &MI, std::set<unsigned> &PassthruRegs); void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag, const char *header = nullptr, const char *footer = nullptr); - void PrescanInstruction(MachineInstr *MI, unsigned Count, - std::set<unsigned>& PassthruRegs); - void ScanInstruction(MachineInstr *MI, unsigned Count); + void PrescanInstruction(MachineInstr &MI, unsigned Count, + std::set<unsigned> &PassthruRegs); + void ScanInstruction(MachineInstr &MI, unsigned Count); BitVector GetRenameRegisters(unsigned Reg); bool FindSuitableFreeRegisters(unsigned AntiDepGroupIndex, RenameOrderType& RenameOrder, diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp index 75579a2..d690734 100644 --- a/contrib/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm/lib/CodeGen/Analysis.cpp @@ -15,7 +15,6 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -624,7 +623,9 @@ bool llvm::canBeOmittedFromSymbolTable(const GlobalValue *GV) { if (!GV->hasLinkOnceODRLinkage()) return false; - if (GV->hasUnnamedAddr()) + // We assume that anyone who sets global unnamed_addr on a non-constant knows + // what they're doing. + if (GV->hasGlobalUnnamedAddr()) return true; // If it is a non constant variable, it needs to be uniqued across shared @@ -634,47 +635,36 @@ bool llvm::canBeOmittedFromSymbolTable(const GlobalValue *GV) { return false; } - // An alias can point to a variable. We could try to resolve the alias to - // decide, but for now just don't hide them. - if (isa<GlobalAlias>(GV)) - return false; - - GlobalStatus GS; - if (GlobalStatus::analyzeGlobal(GV, GS)) - return false; - - return !GS.IsCompared; + return GV->hasAtLeastLocalUnnamedAddr(); } static void collectFuncletMembers( DenseMap<const MachineBasicBlock *, int> &FuncletMembership, int Funclet, const MachineBasicBlock *MBB) { - // Add this MBB to our funclet. - auto P = FuncletMembership.insert(std::make_pair(MBB, Funclet)); + SmallVector<const MachineBasicBlock *, 16> Worklist = {MBB}; + while (!Worklist.empty()) { + const MachineBasicBlock *Visiting = Worklist.pop_back_val(); + // Don't follow blocks which start new funclets. + if (Visiting->isEHPad() && Visiting != MBB) + continue; - // Don't revisit blocks. - if (!P.second) { - assert(P.first->second == Funclet && "MBB is part of two funclets!"); - return; - } + // Add this MBB to our funclet. + auto P = FuncletMembership.insert(std::make_pair(Visiting, Funclet)); - bool IsReturn = false; - int NumTerminators = 0; - for (const MachineInstr &MI : MBB->terminators()) { - IsReturn |= MI.isReturn(); - ++NumTerminators; - } - assert((!IsReturn || NumTerminators == 1) && - "Expected only one terminator when a return is present!"); + // Don't revisit blocks. + if (!P.second) { + assert(P.first->second == Funclet && "MBB is part of two funclets!"); + continue; + } - // Returns are boundaries where funclet transfer can occur, don't follow - // successors. - if (IsReturn) - return; + // Returns are boundaries where funclet transfer can occur, don't follow + // successors. + if (Visiting->isReturnBlock()) + continue; - for (const MachineBasicBlock *SMBB : MBB->successors()) - if (!SMBB->isEHPad()) - collectFuncletMembers(FuncletMembership, Funclet, SMBB); + for (const MachineBasicBlock *Succ : Visiting->successors()) + Worklist.push_back(Succ); + } } DenseMap<const MachineBasicBlock *, int> diff --git a/contrib/llvm/lib/CodeGen/AntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h index 9f05200..04f7f41 100644 --- a/contrib/llvm/lib/CodeGen/AntiDepBreaker.h +++ b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h @@ -47,18 +47,18 @@ public: /// Update liveness information to account for the current /// instruction, which will not be scheduled. - virtual void Observe(MachineInstr *MI, unsigned Count, - unsigned InsertPosIndex) =0; - + virtual void Observe(MachineInstr &MI, unsigned Count, + unsigned InsertPosIndex) = 0; + /// Finish anti-dep breaking for a basic block. virtual void FinishBlock() =0; /// Update DBG_VALUE if dependency breaker is updating /// other machine instruction to use NewReg. - void UpdateDbgValue(MachineInstr *MI, unsigned OldReg, unsigned NewReg) { - assert (MI->isDebugValue() && "MI is not DBG_VALUE!"); - if (MI && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == OldReg) - MI->getOperand(0).setReg(NewReg); + void UpdateDbgValue(MachineInstr &MI, unsigned OldReg, unsigned NewReg) { + assert(MI.isDebugValue() && "MI is not DBG_VALUE!"); + if (MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == OldReg) + MI.getOperand(0).setReg(NewReg); } }; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index ade2d71..5294c98 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "DwarfException.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" @@ -28,7 +27,6 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetFrameLowering.h" diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h index 211fc98..ba3e3b7 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h @@ -11,10 +11,10 @@ #define LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H #include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCSymbol.h" namespace llvm { class MCSection; -class MCSymbol; class AsmPrinter; // Collection of addresses for this unit and assorted labels. // A Symbol->unsigned mapping of addresses used by indirect diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 5f67d3d..272bace 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -12,11 +12,10 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/AsmPrinter.h" +#include "CodeViewDebug.h" #include "DwarfDebug.h" #include "DwarfException.h" #include "WinException.h" -#include "WinCodeViewLineTables.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/CodeGen/Analysis.h" @@ -125,6 +124,10 @@ AsmPrinter::~AsmPrinter() { } } +bool AsmPrinter::isPositionIndependent() const { + return TM.isPositionIndependent(); +} + /// getFunctionNumber - Return a unique ID for the current function. /// unsigned AsmPrinter::getFunctionNumber() const { @@ -248,12 +251,13 @@ bool AsmPrinter::doInitialization(Module &M) { if (MAI->doesSupportDebugInformation()) { bool EmitCodeView = MMI->getModule()->getCodeViewFlag(); if (EmitCodeView && TM.getTargetTriple().isKnownWindowsMSVCEnvironment()) { - Handlers.push_back(HandlerInfo(new WinCodeViewLineTables(this), + Handlers.push_back(HandlerInfo(new CodeViewDebug(this), DbgTimerName, CodeViewLineTablesGroupName)); } if (!EmitCodeView || MMI->getModule()->getDwarfVersion()) { DD = new DwarfDebug(this, &M); + DD->beginModule(); Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName)); } } @@ -319,21 +323,17 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Weak); } return; - case GlobalValue::AppendingLinkage: - // FIXME: appending linkage variables should go into a section of - // their name or something. For now, just emit them as external. case GlobalValue::ExternalLinkage: - // If external or appending, declare as a global symbol. - // .globl _foo + // If external, declare as a global symbol: .globl _foo OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global); return; case GlobalValue::PrivateLinkage: case GlobalValue::InternalLinkage: return; + case GlobalValue::AppendingLinkage: case GlobalValue::AvailableExternallyLinkage: - llvm_unreachable("Should never emit this"); case GlobalValue::ExternalWeakLinkage: - llvm_unreachable("Don't know how to emit these"); + llvm_unreachable("Should never emit this"); } llvm_unreachable("Unknown linkage type!"); } @@ -347,51 +347,17 @@ MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const { return TM.getSymbol(GV, *Mang); } -static MCSymbol *getOrCreateEmuTLSControlSym(MCSymbol *GVSym, MCContext &C) { - return C.getOrCreateSymbol(Twine("__emutls_v.") + GVSym->getName()); -} - -static MCSymbol *getOrCreateEmuTLSInitSym(MCSymbol *GVSym, MCContext &C) { - return C.getOrCreateSymbol(Twine("__emutls_t.") + GVSym->getName()); -} - -/// EmitEmulatedTLSControlVariable - Emit the control variable for an emulated TLS variable. -void AsmPrinter::EmitEmulatedTLSControlVariable(const GlobalVariable *GV, - MCSymbol *EmittedSym, - bool AllZeroInitValue) { - MCSection *TLSVarSection = getObjFileLowering().getDataSection(); - OutStreamer->SwitchSection(TLSVarSection); - MCSymbol *GVSym = getSymbol(GV); - EmitLinkage(GV, EmittedSym); // same linkage as GV - const DataLayout &DL = GV->getParent()->getDataLayout(); - uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType()); - unsigned AlignLog = getGVAlignmentLog2(GV, DL); - unsigned WordSize = DL.getPointerSize(); - unsigned Alignment = DL.getPointerABIAlignment(); - EmitAlignment(Log2_32(Alignment)); - OutStreamer->EmitLabel(EmittedSym); - OutStreamer->EmitIntValue(Size, WordSize); - OutStreamer->EmitIntValue((1 << AlignLog), WordSize); - OutStreamer->EmitIntValue(0, WordSize); - if (GV->hasInitializer() && !AllZeroInitValue) { - OutStreamer->EmitSymbolValue( - getOrCreateEmuTLSInitSym(GVSym, OutContext), WordSize); - } else - OutStreamer->EmitIntValue(0, WordSize); - if (MAI->hasDotTypeDotSizeDirective()) - OutStreamer->emitELFSize(cast<MCSymbolELF>(EmittedSym), - MCConstantExpr::create(4 * WordSize, OutContext)); - OutStreamer->AddBlankLine(); // End of the __emutls_v.* variable. -} - /// EmitGlobalVariable - Emit the specified global variable to the .s file. void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { - bool IsEmuTLSVar = - GV->getThreadLocalMode() != llvm::GlobalVariable::NotThreadLocal && - TM.Options.EmulatedTLS; + bool IsEmuTLSVar = TM.Options.EmulatedTLS && GV->isThreadLocal(); assert(!(IsEmuTLSVar && GV->hasCommonLinkage()) && "No emulated TLS variables in the common section"); + // Never emit TLS variable xyz in emulated TLS model. + // The initialization value is in __emutls_t.xyz instead of xyz. + if (IsEmuTLSVar) + return; + if (GV->hasInitializer()) { // Check to see if this is a special global used by LLVM, if so, emit it. if (EmitSpecialLLVMGlobal(GV)) @@ -402,7 +368,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (GlobalGOTEquivs.count(getSymbol(GV))) return; - if (isVerbose() && !IsEmuTLSVar) { + if (isVerbose()) { // When printing the control variable __emutls_v.*, // we don't need to print the original TLS variable name. GV->printAsOperand(OutStreamer->GetCommentOS(), @@ -412,11 +378,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { } MCSymbol *GVSym = getSymbol(GV); - MCSymbol *EmittedSym = IsEmuTLSVar ? - getOrCreateEmuTLSControlSym(GVSym, OutContext) : GVSym; - // getOrCreateEmuTLSControlSym only creates the symbol with name and default attributes. - // GV's or GVSym's attributes will be used for the EmittedSym. + MCSymbol *EmittedSym = GVSym; + // getOrCreateEmuTLSControlSym only creates the symbol with name and default + // attributes. + // GV's or GVSym's attributes will be used for the EmittedSym. EmitVisibility(EmittedSym, GV->getVisibility(), !GV->isDeclaration()); if (!GV->hasInitializer()) // External globals require no extra code. @@ -440,48 +406,47 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // sections and expected to be contiguous (e.g. ObjC metadata). unsigned AlignLog = getGVAlignmentLog2(GV, DL); - bool AllZeroInitValue = false; - const Constant *InitValue = GV->getInitializer(); - if (isa<ConstantAggregateZero>(InitValue)) - AllZeroInitValue = true; - else { - const ConstantInt *InitIntValue = dyn_cast<ConstantInt>(InitValue); - if (InitIntValue && InitIntValue->isZero()) - AllZeroInitValue = true; - } - if (IsEmuTLSVar) - EmitEmulatedTLSControlVariable(GV, EmittedSym, AllZeroInitValue); - for (const HandlerInfo &HI : Handlers) { NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); HI.Handler->setSymbolSize(GVSym, Size); } - // Handle common and BSS local symbols (.lcomm). - if (GVKind.isCommon() || GVKind.isBSSLocal()) { - assert(!(IsEmuTLSVar && GVKind.isCommon()) && - "No emulated TLS variables in the common section"); + // Handle common symbols + if (GVKind.isCommon()) { if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. unsigned Align = 1 << AlignLog; + if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) + Align = 0; - // Handle common symbols. - if (GVKind.isCommon()) { - if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) - Align = 0; + // .comm _foo, 42, 4 + OutStreamer->EmitCommonSymbol(GVSym, Size, Align); + return; + } - // .comm _foo, 42, 4 - OutStreamer->EmitCommonSymbol(GVSym, Size, Align); - return; - } + // Determine to which section this global should be emitted. + MCSection *TheSection = + getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM); - // Handle local BSS symbols. - if (MAI->hasMachoZeroFillDirective()) { - MCSection *TheSection = - getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM); - // .zerofill __DATA, __bss, _foo, 400, 5 - OutStreamer->EmitZerofill(TheSection, GVSym, Size, Align); - return; - } + // If we have a bss global going to a section that supports the + // zerofill directive, do so here. + if (GVKind.isBSS() && MAI->hasMachoZeroFillDirective() && + TheSection->isVirtualSection()) { + if (Size == 0) + Size = 1; // zerofill of 0 bytes is undefined. + unsigned Align = 1 << AlignLog; + EmitLinkage(GV, GVSym); + // .zerofill __DATA, __bss, _foo, 400, 5 + OutStreamer->EmitZerofill(TheSection, GVSym, Size, Align); + return; + } + + // If this is a BSS local symbol and we are emitting in the BSS + // section use .lcomm/.comm directive. + if (GVKind.isBSSLocal() && + getObjFileLowering().getBSSSection() == TheSection) { + if (Size == 0) + Size = 1; // .comm Foo, 0 is undefined, avoid it. + unsigned Align = 1 << AlignLog; // Use .lcomm only if it supports user-specified alignment. // Otherwise, while it would still be correct to use .lcomm in some @@ -505,30 +470,6 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { return; } - if (IsEmuTLSVar && AllZeroInitValue) - return; // No need of initialization values. - - MCSymbol *EmittedInitSym = IsEmuTLSVar ? - getOrCreateEmuTLSInitSym(GVSym, OutContext) : GVSym; - // getOrCreateEmuTLSInitSym only creates the symbol with name and default attributes. - // GV's or GVSym's attributes will be used for the EmittedInitSym. - - MCSection *TheSection = IsEmuTLSVar ? - getObjFileLowering().getReadOnlySection() : - getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM); - - // Handle the zerofill directive on darwin, which is a special form of BSS - // emission. - if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective() && !IsEmuTLSVar) { - if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined. - - // .globl _foo - OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global); - // .zerofill __DATA, __common, _foo, 400, 5 - OutStreamer->EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog); - return; - } - // Handle thread local data for mach-o which requires us to output an // additional structure of data and mangle the original symbol so that we // can reference it later. @@ -539,7 +480,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // TLOF class. This will also make it more obvious that stuff like // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho // specific code. - if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective() && !IsEmuTLSVar) { + if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) { // Emit the .tbss symbol MCSymbol *MangSym = OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init")); @@ -581,11 +522,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { return; } + MCSymbol *EmittedInitSym = GVSym; + OutStreamer->SwitchSection(TheSection); - // emutls_t.* symbols are only used in the current compilation unit. - if (!IsEmuTLSVar) - EmitLinkage(GV, EmittedInitSym); + EmitLinkage(GV, EmittedInitSym); EmitAlignment(AlignLog, GV); OutStreamer->EmitLabel(EmittedInitSym); @@ -696,20 +637,20 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { // We assume a single instruction only has a spill or reload, not // both. const MachineMemOperand *MMO; - if (TII->isLoadFromStackSlotPostFE(&MI, FI)) { + if (TII->isLoadFromStackSlotPostFE(MI, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) { MMO = *MI.memoperands_begin(); CommentOS << MMO->getSize() << "-byte Reload\n"; } - } else if (TII->hasLoadFromStackSlot(&MI, MMO, FI)) { + } else if (TII->hasLoadFromStackSlot(MI, MMO, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) CommentOS << MMO->getSize() << "-byte Folded Reload\n"; - } else if (TII->isStoreToStackSlotPostFE(&MI, FI)) { + } else if (TII->isStoreToStackSlotPostFE(MI, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) { MMO = *MI.memoperands_begin(); CommentOS << MMO->getSize() << "-byte Spill\n"; } - } else if (TII->hasStoreToStackSlot(&MI, MMO, FI)) { + } else if (TII->hasStoreToStackSlot(MI, MMO, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) CommentOS << MMO->getSize() << "-byte Folded Spill\n"; } @@ -745,7 +686,7 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { AP.MF->getSubtarget().getRegisterInfo()) << (Op.isDef() ? "<def>" : "<kill>"); } - AP.OutStreamer->AddComment(Str); + AP.OutStreamer->AddComment(OS.str()); AP.OutStreamer->AddBlankLine(); } @@ -1065,8 +1006,9 @@ static bool isGOTEquivalentCandidate(const GlobalVariable *GV, // Global GOT equivalents are unnamed private globals with a constant // pointer initializer to another global symbol. They must point to a // GlobalVariable or Function, i.e., as GlobalValue. - if (!GV->hasUnnamedAddr() || !GV->hasInitializer() || !GV->isConstant() || - !GV->isDiscardableIfUnused() || !dyn_cast<GlobalValue>(GV->getOperand(0))) + if (!GV->hasGlobalUnnamedAddr() || !GV->hasInitializer() || + !GV->isConstant() || !GV->isDiscardableIfUnused() || + !dyn_cast<GlobalValue>(GV->getOperand(0))) return false; // To be a got equivalent, at least one of its users need to be a constant @@ -1118,6 +1060,52 @@ void AsmPrinter::emitGlobalGOTEquivs() { EmitGlobalVariable(GV); } +void AsmPrinter::emitGlobalIndirectSymbol(Module &M, + const GlobalIndirectSymbol& GIS) { + MCSymbol *Name = getSymbol(&GIS); + + if (GIS.hasExternalLinkage() || !MAI->getWeakRefDirective()) + OutStreamer->EmitSymbolAttribute(Name, MCSA_Global); + else if (GIS.hasWeakLinkage() || GIS.hasLinkOnceLinkage()) + OutStreamer->EmitSymbolAttribute(Name, MCSA_WeakReference); + else + assert(GIS.hasLocalLinkage() && "Invalid alias or ifunc linkage"); + + // Set the symbol type to function if the alias has a function type. + // This affects codegen when the aliasee is not a function. + if (GIS.getType()->getPointerElementType()->isFunctionTy()) { + OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction); + if (isa<GlobalIFunc>(GIS)) + OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeIndFunction); + } + + EmitVisibility(Name, GIS.getVisibility()); + + const MCExpr *Expr = lowerConstant(GIS.getIndirectSymbol()); + + if (isa<GlobalAlias>(&GIS) && MAI->hasAltEntry() && isa<MCBinaryExpr>(Expr)) + OutStreamer->EmitSymbolAttribute(Name, MCSA_AltEntry); + + // Emit the directives as assignments aka .set: + OutStreamer->EmitAssignment(Name, Expr); + + if (auto *GA = dyn_cast<GlobalAlias>(&GIS)) { + // If the aliasee does not correspond to a symbol in the output, i.e. the + // alias is not of an object or the aliased object is private, then set the + // size of the alias symbol from the type of the alias. We don't do this in + // other situations as the alias and aliasee having differing types but same + // size may be intentional. + const GlobalObject *BaseObject = GA->getBaseObject(); + if (MAI->hasDotTypeDotSizeDirective() && GA->getValueType()->isSized() && + (!BaseObject || BaseObject->hasPrivateLinkage())) { + const DataLayout &DL = M.getDataLayout(); + uint64_t Size = DL.getTypeAllocSize(GA->getValueType()); + OutStreamer->emitELFSize(cast<MCSymbolELF>(Name), + MCConstantExpr::create(Size, OutContext)); + } + } +} + bool AsmPrinter::doFinalization(Module &M) { // Set the MachineFunction to nullptr so that we can catch attempted // accesses to MF specific features at the module level and so that @@ -1191,55 +1179,35 @@ bool AsmPrinter::doFinalization(Module &M) { // to notice uses in operands (due to constant exprs etc). This should // happen with the MC stuff eventually. - // Print out module-level global variables here. - for (const auto &G : M.globals()) { - if (!G.hasExternalWeakLinkage()) + // Print out module-level global objects here. + for (const auto &GO : M.global_objects()) { + if (!GO.hasExternalWeakLinkage()) continue; - OutStreamer->EmitSymbolAttribute(getSymbol(&G), MCSA_WeakReference); - } - - for (const auto &F : M) { - if (!F.hasExternalWeakLinkage()) - continue; - OutStreamer->EmitSymbolAttribute(getSymbol(&F), MCSA_WeakReference); + OutStreamer->EmitSymbolAttribute(getSymbol(&GO), MCSA_WeakReference); } } OutStreamer->AddBlankLine(); - for (const auto &Alias : M.aliases()) { - MCSymbol *Name = getSymbol(&Alias); - - if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective()) - OutStreamer->EmitSymbolAttribute(Name, MCSA_Global); - else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage()) - OutStreamer->EmitSymbolAttribute(Name, MCSA_WeakReference); - else - assert(Alias.hasLocalLinkage() && "Invalid alias linkage"); - - // Set the symbol type to function if the alias has a function type. - // This affects codegen when the aliasee is not a function. - if (Alias.getType()->getPointerElementType()->isFunctionTy()) - OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction); - - EmitVisibility(Name, Alias.getVisibility()); - // Emit the directives as assignments aka .set: - OutStreamer->EmitAssignment(Name, lowerConstant(Alias.getAliasee())); - - // If the aliasee does not correspond to a symbol in the output, i.e. the - // alias is not of an object or the aliased object is private, then set the - // size of the alias symbol from the type of the alias. We don't do this in - // other situations as the alias and aliasee having differing types but same - // size may be intentional. - const GlobalObject *BaseObject = Alias.getBaseObject(); - if (MAI->hasDotTypeDotSizeDirective() && Alias.getValueType()->isSized() && - (!BaseObject || BaseObject->hasPrivateLinkage())) { - const DataLayout &DL = M.getDataLayout(); - uint64_t Size = DL.getTypeAllocSize(Alias.getValueType()); - OutStreamer->emitELFSize(cast<MCSymbolELF>(Name), - MCConstantExpr::create(Size, OutContext)); + // Print aliases in topological order, that is, for each alias a = b, + // b must be printed before a. + // This is because on some targets (e.g. PowerPC) linker expects aliases in + // such an order to generate correct TOC information. + SmallVector<const GlobalAlias *, 16> AliasStack; + SmallPtrSet<const GlobalAlias *, 16> AliasVisited; + for (const auto &Alias : M.aliases()) { + for (const GlobalAlias *Cur = &Alias; Cur; + Cur = dyn_cast<GlobalAlias>(Cur->getAliasee())) { + if (!AliasVisited.insert(Cur).second) + break; + AliasStack.push_back(Cur); } + for (const GlobalAlias *AncestorAlias : reverse(AliasStack)) + emitGlobalIndirectSymbol(M, *AncestorAlias); + AliasStack.clear(); } + for (const auto &IFunc : M.ifuncs()) + emitGlobalIndirectSymbol(M, IFunc); GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); assert(MI && "AsmPrinter didn't require GCModuleInfo?"); @@ -1252,9 +1220,10 @@ bool AsmPrinter::doFinalization(Module &M) { // Emit __morestack address if needed for indirect calls. if (MMI->usesMorestackAddr()) { + unsigned Align = 1; MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant( getDataLayout(), SectionKind::getReadOnly(), - /*C=*/nullptr); + /*C=*/nullptr, Align); OutStreamer->SwitchSection(ReadOnlySection); MCSymbol *AddrSymbol = @@ -1344,8 +1313,8 @@ void AsmPrinter::EmitConstantPool() { if (!CPE.isMachineConstantPoolEntry()) C = CPE.Val.ConstVal; - MCSection *S = - getObjFileLowering().getSectionForConstant(getDataLayout(), Kind, C); + MCSection *S = getObjFileLowering().getSectionForConstant(getDataLayout(), + Kind, C, Align); // The number of sections are small, just do a linear search from the // last section to the first. @@ -1443,7 +1412,7 @@ void AsmPrinter::EmitJumpTableInfo() { // For the EK_LabelDifference32 entry, if using .set avoids a relocation, /// emit a .set directive for each unique entry. if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 && - MAI->doesSetDirectiveSuppressesReloc()) { + MAI->doesSetDirectiveSuppressReloc()) { SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets; const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext); @@ -1524,7 +1493,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, // If the .set directive avoids relocations, this is emitted as: // .set L4_5_set_123, LBB123 - LJTI1_2 // .word L4_5_set_123 - if (MAI->doesSetDirectiveSuppressesReloc()) { + if (MAI->doesSetDirectiveSuppressReloc()) { Value = MCSymbolRefExpr::create(GetJTSetSymbol(UID, MBB->getNumber()), OutContext); break; @@ -1555,7 +1524,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { } // Ignore debug and non-emitted data. This handles llvm.compiler.used. - if (StringRef(GV->getSection()) == "llvm.metadata" || + if (GV->getSection() == "llvm.metadata" || GV->hasAvailableExternallyLinkage()) return true; @@ -1589,7 +1558,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { return true; } - return false; + report_fatal_error("unknown special variable"); } /// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each @@ -1648,7 +1617,8 @@ void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List, S.Priority = Priority->getLimitedValue(65535); S.Func = CS->getOperand(1); if (ETy->getNumElements() == 3 && !CS->getOperand(2)->isNullValue()) - S.ComdatKey = dyn_cast<GlobalValue>(CS->getOperand(2)->stripPointerCasts()); + S.ComdatKey = + dyn_cast<GlobalValue>(CS->getOperand(2)->stripPointerCasts()); } // Emit the function pointers in the target-specific order @@ -1789,10 +1759,6 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { llvm_unreachable("Unknown constant value to lower!"); } - if (const MCExpr *RelocExpr - = getObjFileLowering().getExecutableRelativeSymbol(CE, *Mang, TM)) - return RelocExpr; - switch (CE->getOpcode()) { default: // If the code isn't optimized, there may be outstanding folding @@ -1868,10 +1834,34 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx); } + case Instruction::Sub: { + GlobalValue *LHSGV; + APInt LHSOffset; + if (IsConstantOffsetFromGlobal(CE->getOperand(0), LHSGV, LHSOffset, + getDataLayout())) { + GlobalValue *RHSGV; + APInt RHSOffset; + if (IsConstantOffsetFromGlobal(CE->getOperand(1), RHSGV, RHSOffset, + getDataLayout())) { + const MCExpr *RelocExpr = getObjFileLowering().lowerRelativeReference( + LHSGV, RHSGV, *Mang, TM); + if (!RelocExpr) + RelocExpr = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(getSymbol(LHSGV), Ctx), + MCSymbolRefExpr::create(getSymbol(RHSGV), Ctx), Ctx); + int64_t Addend = (LHSOffset - RHSOffset).getSExtValue(); + if (Addend != 0) + RelocExpr = MCBinaryExpr::createAdd( + RelocExpr, MCConstantExpr::create(Addend, Ctx), Ctx); + return RelocExpr; + } + } + } + // else fallthrough + // The MC library also has a right-shift operator, but it isn't consistently // signed or unsigned between different targets. case Instruction::Add: - case Instruction::Sub: case Instruction::Mul: case Instruction::SDiv: case Instruction::SRem: @@ -1964,7 +1954,7 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL, uint64_t Bytes = DL.getTypeAllocSize(CDS->getType()); // Don't emit a 1-byte object as a .fill. if (Bytes > 1) - return AP.OutStreamer->EmitFill(Bytes, Value); + return AP.OutStreamer->emitFill(Bytes, Value); } // If this can be emitted with .ascii/.asciz, emit it as such. @@ -2003,7 +1993,7 @@ static void emitGlobalConstantArray(const DataLayout &DL, if (Value != -1) { uint64_t Bytes = DL.getTypeAllocSize(CA->getType()); - AP.OutStreamer->EmitFill(Bytes, Value); + AP.OutStreamer->emitFill(Bytes, Value); } else { for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) { @@ -2582,7 +2572,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { // If we are the operands of one of the branches, this is not a fall // through. Note that targets with delay slots will usually bundle // terminators with the delay slot instruction. - for (ConstMIBundleOperands OP(&MI); OP.isValid(); ++OP) { + for (ConstMIBundleOperands OP(MI); OP.isValid(); ++OP) { if (OP->isJTI()) return false; if (OP->isMBB() && OP->getMBB() == MBB) diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 504c5d2..60f40d0 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -178,8 +178,7 @@ void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntryRef S) const { /// EmitDwarfRegOp - Emit dwarf register operation. void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer, const MachineLocation &MLoc) const { - DebugLocDwarfExpression Expr(*MF->getSubtarget().getRegisterInfo(), - getDwarfDebug()->getDwarfVersion(), Streamer); + DebugLocDwarfExpression Expr(getDwarfDebug()->getDwarfVersion(), Streamer); const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo(); int Reg = MRI->getDwarfRegNum(MLoc.getReg(), false); if (Reg < 0) { @@ -193,7 +192,8 @@ void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer, "nop (could not find a dwarf register number)"); // Attempt to find a valid super- or sub-register. - if (!Expr.AddMachineRegPiece(MLoc.getReg())) + if (!Expr.AddMachineRegPiece(*MF->getSubtarget().getRegisterInfo(), + MLoc.getReg())) Expr.EmitOp(dwarf::DW_OP_nop, "nop (could not find a dwarf register number)"); return; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h index e59961f..638226e 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h @@ -19,11 +19,14 @@ namespace llvm { +class AsmPrinter; class MachineBasicBlock; class MachineFunction; class MachineInstr; class MCSymbol; +typedef MCSymbol *ExceptionSymbolProvider(AsmPrinter *Asm); + /// \brief Collects and handles AsmPrinter objects required to build debug /// or EH information. class AsmPrinterHandler { @@ -51,6 +54,10 @@ public: /// beginFunction at all. virtual void endFunction(const MachineFunction *MF) = 0; + virtual void beginFragment(const MachineBasicBlock *MBB, + ExceptionSymbolProvider ESP) {} + virtual void endFragment() {} + /// \brief Emit target-specific EH funclet machinery. virtual void beginFunclet(const MachineBasicBlock &MBB, MCSymbol *Sym = nullptr) {} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 5633aa4..2ce6c18 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -23,10 +23,10 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h index df1997b..aaf6180 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -16,7 +16,6 @@ #define LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H #include "DIEHash.h" -#include "llvm/ADT/ArrayRef.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/LEB128.h" diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp new file mode 100644 index 0000000..ebf80de --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -0,0 +1,2075 @@ +//===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp --*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing Microsoft CodeView debug info. +// +//===----------------------------------------------------------------------===// + +#include "CodeViewDebug.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/DebugInfo/CodeView/ByteStream.h" +#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/FieldListRecordBuilder.h" +#include "llvm/DebugInfo/CodeView/Line.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/CodeView/TypeDumper.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" +#include "llvm/IR/Constants.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/ScopedPrinter.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; +using namespace llvm::codeview; + +CodeViewDebug::CodeViewDebug(AsmPrinter *AP) + : DebugHandlerBase(AP), OS(*Asm->OutStreamer), CurFn(nullptr) { + // If module doesn't have named metadata anchors or COFF debug section + // is not available, skip any debug info related stuff. + if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") || + !AP->getObjFileLowering().getCOFFDebugSymbolsSection()) { + Asm = nullptr; + return; + } + + // Tell MMI that we have debug info. + MMI->setDebugInfoAvailability(true); +} + +StringRef CodeViewDebug::getFullFilepath(const DIFile *File) { + std::string &Filepath = FileToFilepathMap[File]; + if (!Filepath.empty()) + return Filepath; + + StringRef Dir = File->getDirectory(), Filename = File->getFilename(); + + // Clang emits directory and relative filename info into the IR, but CodeView + // operates on full paths. We could change Clang to emit full paths too, but + // that would increase the IR size and probably not needed for other users. + // For now, just concatenate and canonicalize the path here. + if (Filename.find(':') == 1) + Filepath = Filename; + else + Filepath = (Dir + "\\" + Filename).str(); + + // Canonicalize the path. We have to do it textually because we may no longer + // have access the file in the filesystem. + // First, replace all slashes with backslashes. + std::replace(Filepath.begin(), Filepath.end(), '/', '\\'); + + // Remove all "\.\" with "\". + size_t Cursor = 0; + while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos) + Filepath.erase(Cursor, 2); + + // Replace all "\XXX\..\" with "\". Don't try too hard though as the original + // path should be well-formatted, e.g. start with a drive letter, etc. + Cursor = 0; + while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) { + // Something's wrong if the path starts with "\..\", abort. + if (Cursor == 0) + break; + + size_t PrevSlash = Filepath.rfind('\\', Cursor - 1); + if (PrevSlash == std::string::npos) + // Something's wrong, abort. + break; + + Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash); + // The next ".." might be following the one we've just erased. + Cursor = PrevSlash; + } + + // Remove all duplicate backslashes. + Cursor = 0; + while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos) + Filepath.erase(Cursor, 1); + + return Filepath; +} + +unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) { + unsigned NextId = FileIdMap.size() + 1; + auto Insertion = FileIdMap.insert(std::make_pair(F, NextId)); + if (Insertion.second) { + // We have to compute the full filepath and emit a .cv_file directive. + StringRef FullPath = getFullFilepath(F); + NextId = OS.EmitCVFileDirective(NextId, FullPath); + assert(NextId == FileIdMap.size() && ".cv_file directive failed"); + } + return Insertion.first->second; +} + +CodeViewDebug::InlineSite & +CodeViewDebug::getInlineSite(const DILocation *InlinedAt, + const DISubprogram *Inlinee) { + auto SiteInsertion = CurFn->InlineSites.insert({InlinedAt, InlineSite()}); + InlineSite *Site = &SiteInsertion.first->second; + if (SiteInsertion.second) { + Site->SiteFuncId = NextFuncId++; + Site->Inlinee = Inlinee; + InlinedSubprograms.insert(Inlinee); + getFuncIdForSubprogram(Inlinee); + } + return *Site; +} + +static StringRef getPrettyScopeName(const DIScope *Scope) { + StringRef ScopeName = Scope->getName(); + if (!ScopeName.empty()) + return ScopeName; + + switch (Scope->getTag()) { + case dwarf::DW_TAG_enumeration_type: + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + return "<unnamed-tag>"; + case dwarf::DW_TAG_namespace: + return "`anonymous namespace'"; + } + + return StringRef(); +} + +static const DISubprogram *getQualifiedNameComponents( + const DIScope *Scope, SmallVectorImpl<StringRef> &QualifiedNameComponents) { + const DISubprogram *ClosestSubprogram = nullptr; + while (Scope != nullptr) { + if (ClosestSubprogram == nullptr) + ClosestSubprogram = dyn_cast<DISubprogram>(Scope); + StringRef ScopeName = getPrettyScopeName(Scope); + if (!ScopeName.empty()) + QualifiedNameComponents.push_back(ScopeName); + Scope = Scope->getScope().resolve(); + } + return ClosestSubprogram; +} + +static std::string getQualifiedName(ArrayRef<StringRef> QualifiedNameComponents, + StringRef TypeName) { + std::string FullyQualifiedName; + for (StringRef QualifiedNameComponent : reverse(QualifiedNameComponents)) { + FullyQualifiedName.append(QualifiedNameComponent); + FullyQualifiedName.append("::"); + } + FullyQualifiedName.append(TypeName); + return FullyQualifiedName; +} + +static std::string getFullyQualifiedName(const DIScope *Scope, StringRef Name) { + SmallVector<StringRef, 5> QualifiedNameComponents; + getQualifiedNameComponents(Scope, QualifiedNameComponents); + return getQualifiedName(QualifiedNameComponents, Name); +} + +struct CodeViewDebug::TypeLoweringScope { + TypeLoweringScope(CodeViewDebug &CVD) : CVD(CVD) { ++CVD.TypeEmissionLevel; } + ~TypeLoweringScope() { + // Don't decrement TypeEmissionLevel until after emitting deferred types, so + // inner TypeLoweringScopes don't attempt to emit deferred types. + if (CVD.TypeEmissionLevel == 1) + CVD.emitDeferredCompleteTypes(); + --CVD.TypeEmissionLevel; + } + CodeViewDebug &CVD; +}; + +static std::string getFullyQualifiedName(const DIScope *Ty) { + const DIScope *Scope = Ty->getScope().resolve(); + return getFullyQualifiedName(Scope, getPrettyScopeName(Ty)); +} + +TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) { + // No scope means global scope and that uses the zero index. + if (!Scope || isa<DIFile>(Scope)) + return TypeIndex(); + + assert(!isa<DIType>(Scope) && "shouldn't make a namespace scope for a type"); + + // Check if we've already translated this scope. + auto I = TypeIndices.find({Scope, nullptr}); + if (I != TypeIndices.end()) + return I->second; + + // Build the fully qualified name of the scope. + std::string ScopeName = getFullyQualifiedName(Scope); + TypeIndex TI = + TypeTable.writeStringId(StringIdRecord(TypeIndex(), ScopeName)); + return recordTypeIndexForDINode(Scope, TI); +} + +TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) { + assert(SP); + + // Check if we've already translated this subprogram. + auto I = TypeIndices.find({SP, nullptr}); + if (I != TypeIndices.end()) + return I->second; + + // The display name includes function template arguments. Drop them to match + // MSVC. + StringRef DisplayName = SP->getDisplayName().split('<').first; + + const DIScope *Scope = SP->getScope().resolve(); + TypeIndex TI; + if (const auto *Class = dyn_cast_or_null<DICompositeType>(Scope)) { + // If the scope is a DICompositeType, then this must be a method. Member + // function types take some special handling, and require access to the + // subprogram. + TypeIndex ClassType = getTypeIndex(Class); + MemberFuncIdRecord MFuncId(ClassType, getMemberFunctionType(SP, Class), + DisplayName); + TI = TypeTable.writeMemberFuncId(MFuncId); + } else { + // Otherwise, this must be a free function. + TypeIndex ParentScope = getScopeIndex(Scope); + FuncIdRecord FuncId(ParentScope, getTypeIndex(SP->getType()), DisplayName); + TI = TypeTable.writeFuncId(FuncId); + } + + return recordTypeIndexForDINode(SP, TI); +} + +TypeIndex CodeViewDebug::getMemberFunctionType(const DISubprogram *SP, + const DICompositeType *Class) { + // Always use the method declaration as the key for the function type. The + // method declaration contains the this adjustment. + if (SP->getDeclaration()) + SP = SP->getDeclaration(); + assert(!SP->getDeclaration() && "should use declaration as key"); + + // Key the MemberFunctionRecord into the map as {SP, Class}. It won't collide + // with the MemberFuncIdRecord, which is keyed in as {SP, nullptr}. + auto I = TypeIndices.find({SP, Class}); + if (I != TypeIndices.end()) + return I->second; + + // Make sure complete type info for the class is emitted *after* the member + // function type, as the complete class type is likely to reference this + // member function type. + TypeLoweringScope S(*this); + TypeIndex TI = + lowerTypeMemberFunction(SP->getType(), Class, SP->getThisAdjustment()); + return recordTypeIndexForDINode(SP, TI, Class); +} + +TypeIndex CodeViewDebug::recordTypeIndexForDINode(const DINode *Node, + TypeIndex TI, + const DIType *ClassTy) { + auto InsertResult = TypeIndices.insert({{Node, ClassTy}, TI}); + (void)InsertResult; + assert(InsertResult.second && "DINode was already assigned a type index"); + return TI; +} + +unsigned CodeViewDebug::getPointerSizeInBytes() { + return MMI->getModule()->getDataLayout().getPointerSizeInBits() / 8; +} + +void CodeViewDebug::recordLocalVariable(LocalVariable &&Var, + const DILocation *InlinedAt) { + if (InlinedAt) { + // This variable was inlined. Associate it with the InlineSite. + const DISubprogram *Inlinee = Var.DIVar->getScope()->getSubprogram(); + InlineSite &Site = getInlineSite(InlinedAt, Inlinee); + Site.InlinedLocals.emplace_back(Var); + } else { + // This variable goes in the main ProcSym. + CurFn->Locals.emplace_back(Var); + } +} + +static void addLocIfNotPresent(SmallVectorImpl<const DILocation *> &Locs, + const DILocation *Loc) { + auto B = Locs.begin(), E = Locs.end(); + if (std::find(B, E, Loc) == E) + Locs.push_back(Loc); +} + +void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL, + const MachineFunction *MF) { + // Skip this instruction if it has the same location as the previous one. + if (DL == CurFn->LastLoc) + return; + + const DIScope *Scope = DL.get()->getScope(); + if (!Scope) + return; + + // Skip this line if it is longer than the maximum we can record. + LineInfo LI(DL.getLine(), DL.getLine(), /*IsStatement=*/true); + if (LI.getStartLine() != DL.getLine() || LI.isAlwaysStepInto() || + LI.isNeverStepInto()) + return; + + ColumnInfo CI(DL.getCol(), /*EndColumn=*/0); + if (CI.getStartColumn() != DL.getCol()) + return; + + if (!CurFn->HaveLineInfo) + CurFn->HaveLineInfo = true; + unsigned FileId = 0; + if (CurFn->LastLoc.get() && CurFn->LastLoc->getFile() == DL->getFile()) + FileId = CurFn->LastFileId; + else + FileId = CurFn->LastFileId = maybeRecordFile(DL->getFile()); + CurFn->LastLoc = DL; + + unsigned FuncId = CurFn->FuncId; + if (const DILocation *SiteLoc = DL->getInlinedAt()) { + const DILocation *Loc = DL.get(); + + // If this location was actually inlined from somewhere else, give it the ID + // of the inline call site. + FuncId = + getInlineSite(SiteLoc, Loc->getScope()->getSubprogram()).SiteFuncId; + + // Ensure we have links in the tree of inline call sites. + bool FirstLoc = true; + while ((SiteLoc = Loc->getInlinedAt())) { + InlineSite &Site = + getInlineSite(SiteLoc, Loc->getScope()->getSubprogram()); + if (!FirstLoc) + addLocIfNotPresent(Site.ChildSites, Loc); + FirstLoc = false; + Loc = SiteLoc; + } + addLocIfNotPresent(CurFn->ChildSites, Loc); + } + + OS.EmitCVLocDirective(FuncId, FileId, DL.getLine(), DL.getCol(), + /*PrologueEnd=*/false, + /*IsStmt=*/false, DL->getFilename()); +} + +void CodeViewDebug::emitCodeViewMagicVersion() { + OS.EmitValueToAlignment(4); + OS.AddComment("Debug section magic"); + OS.EmitIntValue(COFF::DEBUG_SECTION_MAGIC, 4); +} + +void CodeViewDebug::endModule() { + if (!Asm || !MMI->hasDebugInfo()) + return; + + assert(Asm != nullptr); + + // The COFF .debug$S section consists of several subsections, each starting + // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length + // of the payload followed by the payload itself. The subsections are 4-byte + // aligned. + + // Use the generic .debug$S section, and make a subsection for all the inlined + // subprograms. + switchToDebugSectionForSymbol(nullptr); + emitInlineeLinesSubsection(); + + // Emit per-function debug information. + for (auto &P : FnDebugInfo) + if (!P.first->isDeclarationForLinker()) + emitDebugInfoForFunction(P.first, P.second); + + // Emit global variable debug information. + setCurrentSubprogram(nullptr); + emitDebugInfoForGlobals(); + + // Emit retained types. + emitDebugInfoForRetainedTypes(); + + // Switch back to the generic .debug$S section after potentially processing + // comdat symbol sections. + switchToDebugSectionForSymbol(nullptr); + + // Emit UDT records for any types used by global variables. + if (!GlobalUDTs.empty()) { + MCSymbol *SymbolsEnd = beginCVSubsection(ModuleSubstreamKind::Symbols); + emitDebugInfoForUDTs(GlobalUDTs); + endCVSubsection(SymbolsEnd); + } + + // This subsection holds a file index to offset in string table table. + OS.AddComment("File index to string table offset subsection"); + OS.EmitCVFileChecksumsDirective(); + + // This subsection holds the string table. + OS.AddComment("String table"); + OS.EmitCVStringTableDirective(); + + // Emit type information last, so that any types we translate while emitting + // function info are included. + emitTypeInformation(); + + clear(); +} + +static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S) { + // Microsoft's linker seems to have trouble with symbol names longer than + // 0xffd8 bytes. + S = S.substr(0, 0xffd8); + SmallString<32> NullTerminatedString(S); + NullTerminatedString.push_back('\0'); + OS.EmitBytes(NullTerminatedString); +} + +void CodeViewDebug::emitTypeInformation() { + // Do nothing if we have no debug info or if no non-trivial types were emitted + // to TypeTable during codegen. + NamedMDNode *CU_Nodes = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); + if (!CU_Nodes) + return; + if (TypeTable.empty()) + return; + + // Start the .debug$T section with 0x4. + OS.SwitchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection()); + emitCodeViewMagicVersion(); + + SmallString<8> CommentPrefix; + if (OS.isVerboseAsm()) { + CommentPrefix += '\t'; + CommentPrefix += Asm->MAI->getCommentString(); + CommentPrefix += ' '; + } + + CVTypeDumper CVTD(nullptr, /*PrintRecordBytes=*/false); + TypeTable.ForEachRecord( + [&](TypeIndex Index, StringRef Record) { + if (OS.isVerboseAsm()) { + // Emit a block comment describing the type record for readability. + SmallString<512> CommentBlock; + raw_svector_ostream CommentOS(CommentBlock); + ScopedPrinter SP(CommentOS); + SP.setPrefix(CommentPrefix); + CVTD.setPrinter(&SP); + Error E = CVTD.dump({Record.bytes_begin(), Record.bytes_end()}); + if (E) { + logAllUnhandledErrors(std::move(E), errs(), "error: "); + llvm_unreachable("produced malformed type record"); + } + // emitRawComment will insert its own tab and comment string before + // the first line, so strip off our first one. It also prints its own + // newline. + OS.emitRawComment( + CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim()); + } else { +#ifndef NDEBUG + // Assert that the type data is valid even if we aren't dumping + // comments. The MSVC linker doesn't do much type record validation, + // so the first link of an invalid type record can succeed while + // subsequent links will fail with LNK1285. + ByteStream<> Stream({Record.bytes_begin(), Record.bytes_end()}); + CVTypeArray Types; + StreamReader Reader(Stream); + Error E = Reader.readArray(Types, Reader.getLength()); + if (!E) { + TypeVisitorCallbacks C; + E = CVTypeVisitor(C).visitTypeStream(Types); + } + if (E) { + logAllUnhandledErrors(std::move(E), errs(), "error: "); + llvm_unreachable("produced malformed type record"); + } +#endif + } + OS.EmitBinaryData(Record); + }); +} + +void CodeViewDebug::emitInlineeLinesSubsection() { + if (InlinedSubprograms.empty()) + return; + + OS.AddComment("Inlinee lines subsection"); + MCSymbol *InlineEnd = beginCVSubsection(ModuleSubstreamKind::InlineeLines); + + // We don't provide any extra file info. + // FIXME: Find out if debuggers use this info. + OS.AddComment("Inlinee lines signature"); + OS.EmitIntValue(unsigned(InlineeLinesSignature::Normal), 4); + + for (const DISubprogram *SP : InlinedSubprograms) { + assert(TypeIndices.count({SP, nullptr})); + TypeIndex InlineeIdx = TypeIndices[{SP, nullptr}]; + + OS.AddBlankLine(); + unsigned FileId = maybeRecordFile(SP->getFile()); + OS.AddComment("Inlined function " + SP->getDisplayName() + " starts at " + + SP->getFilename() + Twine(':') + Twine(SP->getLine())); + OS.AddBlankLine(); + // The filechecksum table uses 8 byte entries for now, and file ids start at + // 1. + unsigned FileOffset = (FileId - 1) * 8; + OS.AddComment("Type index of inlined function"); + OS.EmitIntValue(InlineeIdx.getIndex(), 4); + OS.AddComment("Offset into filechecksum table"); + OS.EmitIntValue(FileOffset, 4); + OS.AddComment("Starting line number"); + OS.EmitIntValue(SP->getLine(), 4); + } + + endCVSubsection(InlineEnd); +} + +void CodeViewDebug::collectInlineSiteChildren( + SmallVectorImpl<unsigned> &Children, const FunctionInfo &FI, + const InlineSite &Site) { + for (const DILocation *ChildSiteLoc : Site.ChildSites) { + auto I = FI.InlineSites.find(ChildSiteLoc); + const InlineSite &ChildSite = I->second; + Children.push_back(ChildSite.SiteFuncId); + collectInlineSiteChildren(Children, FI, ChildSite); + } +} + +void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI, + const DILocation *InlinedAt, + const InlineSite &Site) { + MCSymbol *InlineBegin = MMI->getContext().createTempSymbol(), + *InlineEnd = MMI->getContext().createTempSymbol(); + + assert(TypeIndices.count({Site.Inlinee, nullptr})); + TypeIndex InlineeIdx = TypeIndices[{Site.Inlinee, nullptr}]; + + // SymbolRecord + OS.AddComment("Record length"); + OS.emitAbsoluteSymbolDiff(InlineEnd, InlineBegin, 2); // RecordLength + OS.EmitLabel(InlineBegin); + OS.AddComment("Record kind: S_INLINESITE"); + OS.EmitIntValue(SymbolKind::S_INLINESITE, 2); // RecordKind + + OS.AddComment("PtrParent"); + OS.EmitIntValue(0, 4); + OS.AddComment("PtrEnd"); + OS.EmitIntValue(0, 4); + OS.AddComment("Inlinee type index"); + OS.EmitIntValue(InlineeIdx.getIndex(), 4); + + unsigned FileId = maybeRecordFile(Site.Inlinee->getFile()); + unsigned StartLineNum = Site.Inlinee->getLine(); + SmallVector<unsigned, 3> SecondaryFuncIds; + collectInlineSiteChildren(SecondaryFuncIds, FI, Site); + + OS.EmitCVInlineLinetableDirective(Site.SiteFuncId, FileId, StartLineNum, + FI.Begin, FI.End, SecondaryFuncIds); + + OS.EmitLabel(InlineEnd); + + emitLocalVariableList(Site.InlinedLocals); + + // Recurse on child inlined call sites before closing the scope. + for (const DILocation *ChildSite : Site.ChildSites) { + auto I = FI.InlineSites.find(ChildSite); + assert(I != FI.InlineSites.end() && + "child site not in function inline site map"); + emitInlinedCallSite(FI, ChildSite, I->second); + } + + // Close the scope. + OS.AddComment("Record length"); + OS.EmitIntValue(2, 2); // RecordLength + OS.AddComment("Record kind: S_INLINESITE_END"); + OS.EmitIntValue(SymbolKind::S_INLINESITE_END, 2); // RecordKind +} + +void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) { + // If we have a symbol, it may be in a section that is COMDAT. If so, find the + // comdat key. A section may be comdat because of -ffunction-sections or + // because it is comdat in the IR. + MCSectionCOFF *GVSec = + GVSym ? dyn_cast<MCSectionCOFF>(&GVSym->getSection()) : nullptr; + const MCSymbol *KeySym = GVSec ? GVSec->getCOMDATSymbol() : nullptr; + + MCSectionCOFF *DebugSec = cast<MCSectionCOFF>( + Asm->getObjFileLowering().getCOFFDebugSymbolsSection()); + DebugSec = OS.getContext().getAssociativeCOFFSection(DebugSec, KeySym); + + OS.SwitchSection(DebugSec); + + // Emit the magic version number if this is the first time we've switched to + // this section. + if (ComdatDebugSections.insert(DebugSec).second) + emitCodeViewMagicVersion(); +} + +void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, + FunctionInfo &FI) { + // For each function there is a separate subsection + // which holds the PC to file:line table. + const MCSymbol *Fn = Asm->getSymbol(GV); + assert(Fn); + + // Switch to the to a comdat section, if appropriate. + switchToDebugSectionForSymbol(Fn); + + std::string FuncName; + auto *SP = GV->getSubprogram(); + assert(SP); + setCurrentSubprogram(SP); + + // If we have a display name, build the fully qualified name by walking the + // chain of scopes. + if (!SP->getDisplayName().empty()) + FuncName = + getFullyQualifiedName(SP->getScope().resolve(), SP->getDisplayName()); + + // If our DISubprogram name is empty, use the mangled name. + if (FuncName.empty()) + FuncName = GlobalValue::getRealLinkageName(GV->getName()); + + // Emit a symbol subsection, required by VS2012+ to find function boundaries. + OS.AddComment("Symbol subsection for " + Twine(FuncName)); + MCSymbol *SymbolsEnd = beginCVSubsection(ModuleSubstreamKind::Symbols); + { + MCSymbol *ProcRecordBegin = MMI->getContext().createTempSymbol(), + *ProcRecordEnd = MMI->getContext().createTempSymbol(); + OS.AddComment("Record length"); + OS.emitAbsoluteSymbolDiff(ProcRecordEnd, ProcRecordBegin, 2); + OS.EmitLabel(ProcRecordBegin); + + if (GV->hasLocalLinkage()) { + OS.AddComment("Record kind: S_LPROC32_ID"); + OS.EmitIntValue(unsigned(SymbolKind::S_LPROC32_ID), 2); + } else { + OS.AddComment("Record kind: S_GPROC32_ID"); + OS.EmitIntValue(unsigned(SymbolKind::S_GPROC32_ID), 2); + } + + // These fields are filled in by tools like CVPACK which run after the fact. + OS.AddComment("PtrParent"); + OS.EmitIntValue(0, 4); + OS.AddComment("PtrEnd"); + OS.EmitIntValue(0, 4); + OS.AddComment("PtrNext"); + OS.EmitIntValue(0, 4); + // This is the important bit that tells the debugger where the function + // code is located and what's its size: + OS.AddComment("Code size"); + OS.emitAbsoluteSymbolDiff(FI.End, Fn, 4); + OS.AddComment("Offset after prologue"); + OS.EmitIntValue(0, 4); + OS.AddComment("Offset before epilogue"); + OS.EmitIntValue(0, 4); + OS.AddComment("Function type index"); + OS.EmitIntValue(getFuncIdForSubprogram(GV->getSubprogram()).getIndex(), 4); + OS.AddComment("Function section relative address"); + OS.EmitCOFFSecRel32(Fn); + OS.AddComment("Function section index"); + OS.EmitCOFFSectionIndex(Fn); + OS.AddComment("Flags"); + OS.EmitIntValue(0, 1); + // Emit the function display name as a null-terminated string. + OS.AddComment("Function name"); + // Truncate the name so we won't overflow the record length field. + emitNullTerminatedSymbolName(OS, FuncName); + OS.EmitLabel(ProcRecordEnd); + + emitLocalVariableList(FI.Locals); + + // Emit inlined call site information. Only emit functions inlined directly + // into the parent function. We'll emit the other sites recursively as part + // of their parent inline site. + for (const DILocation *InlinedAt : FI.ChildSites) { + auto I = FI.InlineSites.find(InlinedAt); + assert(I != FI.InlineSites.end() && + "child site not in function inline site map"); + emitInlinedCallSite(FI, InlinedAt, I->second); + } + + if (SP != nullptr) + emitDebugInfoForUDTs(LocalUDTs); + + // We're done with this function. + OS.AddComment("Record length"); + OS.EmitIntValue(0x0002, 2); + OS.AddComment("Record kind: S_PROC_ID_END"); + OS.EmitIntValue(unsigned(SymbolKind::S_PROC_ID_END), 2); + } + endCVSubsection(SymbolsEnd); + + // We have an assembler directive that takes care of the whole line table. + OS.EmitCVLinetableDirective(FI.FuncId, Fn, FI.End); +} + +CodeViewDebug::LocalVarDefRange +CodeViewDebug::createDefRangeMem(uint16_t CVRegister, int Offset) { + LocalVarDefRange DR; + DR.InMemory = -1; + DR.DataOffset = Offset; + assert(DR.DataOffset == Offset && "truncation"); + DR.StructOffset = 0; + DR.CVRegister = CVRegister; + return DR; +} + +CodeViewDebug::LocalVarDefRange +CodeViewDebug::createDefRangeReg(uint16_t CVRegister) { + LocalVarDefRange DR; + DR.InMemory = 0; + DR.DataOffset = 0; + DR.StructOffset = 0; + DR.CVRegister = CVRegister; + return DR; +} + +void CodeViewDebug::collectVariableInfoFromMMITable( + DenseSet<InlinedVariable> &Processed) { + const TargetSubtargetInfo &TSI = Asm->MF->getSubtarget(); + const TargetFrameLowering *TFI = TSI.getFrameLowering(); + const TargetRegisterInfo *TRI = TSI.getRegisterInfo(); + + for (const MachineModuleInfo::VariableDbgInfo &VI : + MMI->getVariableDbgInfo()) { + if (!VI.Var) + continue; + assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) && + "Expected inlined-at fields to agree"); + + Processed.insert(InlinedVariable(VI.Var, VI.Loc->getInlinedAt())); + LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc); + + // If variable scope is not found then skip this variable. + if (!Scope) + continue; + + // Get the frame register used and the offset. + unsigned FrameReg = 0; + int FrameOffset = TFI->getFrameIndexReference(*Asm->MF, VI.Slot, FrameReg); + uint16_t CVReg = TRI->getCodeViewRegNum(FrameReg); + + // Calculate the label ranges. + LocalVarDefRange DefRange = createDefRangeMem(CVReg, FrameOffset); + for (const InsnRange &Range : Scope->getRanges()) { + const MCSymbol *Begin = getLabelBeforeInsn(Range.first); + const MCSymbol *End = getLabelAfterInsn(Range.second); + End = End ? End : Asm->getFunctionEnd(); + DefRange.Ranges.emplace_back(Begin, End); + } + + LocalVariable Var; + Var.DIVar = VI.Var; + Var.DefRanges.emplace_back(std::move(DefRange)); + recordLocalVariable(std::move(Var), VI.Loc->getInlinedAt()); + } +} + +void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) { + DenseSet<InlinedVariable> Processed; + // Grab the variable info that was squirreled away in the MMI side-table. + collectVariableInfoFromMMITable(Processed); + + const TargetRegisterInfo *TRI = Asm->MF->getSubtarget().getRegisterInfo(); + + for (const auto &I : DbgValues) { + InlinedVariable IV = I.first; + if (Processed.count(IV)) + continue; + const DILocalVariable *DIVar = IV.first; + const DILocation *InlinedAt = IV.second; + + // Instruction ranges, specifying where IV is accessible. + const auto &Ranges = I.second; + + LexicalScope *Scope = nullptr; + if (InlinedAt) + Scope = LScopes.findInlinedScope(DIVar->getScope(), InlinedAt); + else + Scope = LScopes.findLexicalScope(DIVar->getScope()); + // If variable scope is not found then skip this variable. + if (!Scope) + continue; + + LocalVariable Var; + Var.DIVar = DIVar; + + // Calculate the definition ranges. + for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { + const InsnRange &Range = *I; + const MachineInstr *DVInst = Range.first; + assert(DVInst->isDebugValue() && "Invalid History entry"); + const DIExpression *DIExpr = DVInst->getDebugExpression(); + + // Bail if there is a complex DWARF expression for now. + if (DIExpr && DIExpr->getNumElements() > 0) + continue; + + // Bail if operand 0 is not a valid register. This means the variable is a + // simple constant, or is described by a complex expression. + // FIXME: Find a way to represent constant variables, since they are + // relatively common. + unsigned Reg = + DVInst->getOperand(0).isReg() ? DVInst->getOperand(0).getReg() : 0; + if (Reg == 0) + continue; + + // Handle the two cases we can handle: indirect in memory and in register. + bool IsIndirect = DVInst->getOperand(1).isImm(); + unsigned CVReg = TRI->getCodeViewRegNum(DVInst->getOperand(0).getReg()); + { + LocalVarDefRange DefRange; + if (IsIndirect) { + int64_t Offset = DVInst->getOperand(1).getImm(); + DefRange = createDefRangeMem(CVReg, Offset); + } else { + DefRange = createDefRangeReg(CVReg); + } + if (Var.DefRanges.empty() || + Var.DefRanges.back().isDifferentLocation(DefRange)) { + Var.DefRanges.emplace_back(std::move(DefRange)); + } + } + + // Compute the label range. + const MCSymbol *Begin = getLabelBeforeInsn(Range.first); + const MCSymbol *End = getLabelAfterInsn(Range.second); + if (!End) { + if (std::next(I) != E) + End = getLabelBeforeInsn(std::next(I)->first); + else + End = Asm->getFunctionEnd(); + } + + // If the last range end is our begin, just extend the last range. + // Otherwise make a new range. + SmallVectorImpl<std::pair<const MCSymbol *, const MCSymbol *>> &Ranges = + Var.DefRanges.back().Ranges; + if (!Ranges.empty() && Ranges.back().second == Begin) + Ranges.back().second = End; + else + Ranges.emplace_back(Begin, End); + + // FIXME: Do more range combining. + } + + recordLocalVariable(std::move(Var), InlinedAt); + } +} + +void CodeViewDebug::beginFunction(const MachineFunction *MF) { + assert(!CurFn && "Can't process two functions at once!"); + + if (!Asm || !MMI->hasDebugInfo() || !MF->getFunction()->getSubprogram()) + return; + + DebugHandlerBase::beginFunction(MF); + + const Function *GV = MF->getFunction(); + assert(FnDebugInfo.count(GV) == false); + CurFn = &FnDebugInfo[GV]; + CurFn->FuncId = NextFuncId++; + CurFn->Begin = Asm->getFunctionBegin(); + + // Find the end of the function prolog. First known non-DBG_VALUE and + // non-frame setup location marks the beginning of the function body. + // FIXME: is there a simpler a way to do this? Can we just search + // for the first instruction of the function, not the last of the prolog? + DebugLoc PrologEndLoc; + bool EmptyPrologue = true; + for (const auto &MBB : *MF) { + for (const auto &MI : MBB) { + if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) && + MI.getDebugLoc()) { + PrologEndLoc = MI.getDebugLoc(); + break; + } else if (!MI.isDebugValue()) { + EmptyPrologue = false; + } + } + } + + // Record beginning of function if we have a non-empty prologue. + if (PrologEndLoc && !EmptyPrologue) { + DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc(); + maybeRecordLocation(FnStartDL, MF); + } +} + +void CodeViewDebug::addToUDTs(const DIType *Ty, TypeIndex TI) { + // Don't record empty UDTs. + if (Ty->getName().empty()) + return; + + SmallVector<StringRef, 5> QualifiedNameComponents; + const DISubprogram *ClosestSubprogram = getQualifiedNameComponents( + Ty->getScope().resolve(), QualifiedNameComponents); + + std::string FullyQualifiedName = + getQualifiedName(QualifiedNameComponents, getPrettyScopeName(Ty)); + + if (ClosestSubprogram == nullptr) + GlobalUDTs.emplace_back(std::move(FullyQualifiedName), TI); + else if (ClosestSubprogram == CurrentSubprogram) + LocalUDTs.emplace_back(std::move(FullyQualifiedName), TI); + + // TODO: What if the ClosestSubprogram is neither null or the current + // subprogram? Currently, the UDT just gets dropped on the floor. + // + // The current behavior is not desirable. To get maximal fidelity, we would + // need to perform all type translation before beginning emission of .debug$S + // and then make LocalUDTs a member of FunctionInfo +} + +TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) { + // Generic dispatch for lowering an unknown type. + switch (Ty->getTag()) { + case dwarf::DW_TAG_array_type: + return lowerTypeArray(cast<DICompositeType>(Ty)); + case dwarf::DW_TAG_typedef: + return lowerTypeAlias(cast<DIDerivedType>(Ty)); + case dwarf::DW_TAG_base_type: + return lowerTypeBasic(cast<DIBasicType>(Ty)); + case dwarf::DW_TAG_pointer_type: + case dwarf::DW_TAG_reference_type: + case dwarf::DW_TAG_rvalue_reference_type: + return lowerTypePointer(cast<DIDerivedType>(Ty)); + case dwarf::DW_TAG_ptr_to_member_type: + return lowerTypeMemberPointer(cast<DIDerivedType>(Ty)); + case dwarf::DW_TAG_const_type: + case dwarf::DW_TAG_volatile_type: + return lowerTypeModifier(cast<DIDerivedType>(Ty)); + case dwarf::DW_TAG_subroutine_type: + if (ClassTy) { + // The member function type of a member function pointer has no + // ThisAdjustment. + return lowerTypeMemberFunction(cast<DISubroutineType>(Ty), ClassTy, + /*ThisAdjustment=*/0); + } + return lowerTypeFunction(cast<DISubroutineType>(Ty)); + case dwarf::DW_TAG_enumeration_type: + return lowerTypeEnum(cast<DICompositeType>(Ty)); + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_structure_type: + return lowerTypeClass(cast<DICompositeType>(Ty)); + case dwarf::DW_TAG_union_type: + return lowerTypeUnion(cast<DICompositeType>(Ty)); + default: + // Use the null type index. + return TypeIndex(); + } +} + +TypeIndex CodeViewDebug::lowerTypeAlias(const DIDerivedType *Ty) { + DITypeRef UnderlyingTypeRef = Ty->getBaseType(); + TypeIndex UnderlyingTypeIndex = getTypeIndex(UnderlyingTypeRef); + StringRef TypeName = Ty->getName(); + + addToUDTs(Ty, UnderlyingTypeIndex); + + if (UnderlyingTypeIndex == TypeIndex(SimpleTypeKind::Int32Long) && + TypeName == "HRESULT") + return TypeIndex(SimpleTypeKind::HResult); + if (UnderlyingTypeIndex == TypeIndex(SimpleTypeKind::UInt16Short) && + TypeName == "wchar_t") + return TypeIndex(SimpleTypeKind::WideCharacter); + + return UnderlyingTypeIndex; +} + +TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) { + DITypeRef ElementTypeRef = Ty->getBaseType(); + TypeIndex ElementTypeIndex = getTypeIndex(ElementTypeRef); + // IndexType is size_t, which depends on the bitness of the target. + TypeIndex IndexType = Asm->MAI->getPointerSize() == 8 + ? TypeIndex(SimpleTypeKind::UInt64Quad) + : TypeIndex(SimpleTypeKind::UInt32Long); + + uint64_t ElementSize = getBaseTypeSize(ElementTypeRef) / 8; + + bool UndefinedSubrange = false; + + // FIXME: + // There is a bug in the front-end where an array of a structure, which was + // declared as incomplete structure first, ends up not getting a size assigned + // to it. (PR28303) + // Example: + // struct A(*p)[3]; + // struct A { int f; } a[3]; + // + // This needs to be fixed in the front-end, but in the meantime we don't want + // to trigger an assertion because of this. + if (Ty->getSizeInBits() == 0) { + UndefinedSubrange = true; + } + + // Add subranges to array type. + DINodeArray Elements = Ty->getElements(); + for (int i = Elements.size() - 1; i >= 0; --i) { + const DINode *Element = Elements[i]; + assert(Element->getTag() == dwarf::DW_TAG_subrange_type); + + const DISubrange *Subrange = cast<DISubrange>(Element); + assert(Subrange->getLowerBound() == 0 && + "codeview doesn't support subranges with lower bounds"); + int64_t Count = Subrange->getCount(); + + // Variable Length Array (VLA) has Count equal to '-1'. + // Replace with Count '1', assume it is the minimum VLA length. + // FIXME: Make front-end support VLA subrange and emit LF_DIMVARLU. + if (Count == -1) { + Count = 1; + UndefinedSubrange = true; + } + + StringRef Name = (i == 0) ? Ty->getName() : ""; + // Update the element size and element type index for subsequent subranges. + ElementSize *= Count; + ElementTypeIndex = TypeTable.writeArray( + ArrayRecord(ElementTypeIndex, IndexType, ElementSize, Name)); + } + + (void)UndefinedSubrange; + assert(UndefinedSubrange || ElementSize == (Ty->getSizeInBits() / 8)); + + return ElementTypeIndex; +} + +TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) { + TypeIndex Index; + dwarf::TypeKind Kind; + uint32_t ByteSize; + + Kind = static_cast<dwarf::TypeKind>(Ty->getEncoding()); + ByteSize = Ty->getSizeInBits() / 8; + + SimpleTypeKind STK = SimpleTypeKind::None; + switch (Kind) { + case dwarf::DW_ATE_address: + // FIXME: Translate + break; + case dwarf::DW_ATE_boolean: + switch (ByteSize) { + case 1: STK = SimpleTypeKind::Boolean8; break; + case 2: STK = SimpleTypeKind::Boolean16; break; + case 4: STK = SimpleTypeKind::Boolean32; break; + case 8: STK = SimpleTypeKind::Boolean64; break; + case 16: STK = SimpleTypeKind::Boolean128; break; + } + break; + case dwarf::DW_ATE_complex_float: + switch (ByteSize) { + case 2: STK = SimpleTypeKind::Complex16; break; + case 4: STK = SimpleTypeKind::Complex32; break; + case 8: STK = SimpleTypeKind::Complex64; break; + case 10: STK = SimpleTypeKind::Complex80; break; + case 16: STK = SimpleTypeKind::Complex128; break; + } + break; + case dwarf::DW_ATE_float: + switch (ByteSize) { + case 2: STK = SimpleTypeKind::Float16; break; + case 4: STK = SimpleTypeKind::Float32; break; + case 6: STK = SimpleTypeKind::Float48; break; + case 8: STK = SimpleTypeKind::Float64; break; + case 10: STK = SimpleTypeKind::Float80; break; + case 16: STK = SimpleTypeKind::Float128; break; + } + break; + case dwarf::DW_ATE_signed: + switch (ByteSize) { + case 1: STK = SimpleTypeKind::SByte; break; + case 2: STK = SimpleTypeKind::Int16Short; break; + case 4: STK = SimpleTypeKind::Int32; break; + case 8: STK = SimpleTypeKind::Int64Quad; break; + case 16: STK = SimpleTypeKind::Int128Oct; break; + } + break; + case dwarf::DW_ATE_unsigned: + switch (ByteSize) { + case 1: STK = SimpleTypeKind::Byte; break; + case 2: STK = SimpleTypeKind::UInt16Short; break; + case 4: STK = SimpleTypeKind::UInt32; break; + case 8: STK = SimpleTypeKind::UInt64Quad; break; + case 16: STK = SimpleTypeKind::UInt128Oct; break; + } + break; + case dwarf::DW_ATE_UTF: + switch (ByteSize) { + case 2: STK = SimpleTypeKind::Character16; break; + case 4: STK = SimpleTypeKind::Character32; break; + } + break; + case dwarf::DW_ATE_signed_char: + if (ByteSize == 1) + STK = SimpleTypeKind::SignedCharacter; + break; + case dwarf::DW_ATE_unsigned_char: + if (ByteSize == 1) + STK = SimpleTypeKind::UnsignedCharacter; + break; + default: + break; + } + + // Apply some fixups based on the source-level type name. + if (STK == SimpleTypeKind::Int32 && Ty->getName() == "long int") + STK = SimpleTypeKind::Int32Long; + if (STK == SimpleTypeKind::UInt32 && Ty->getName() == "long unsigned int") + STK = SimpleTypeKind::UInt32Long; + if (STK == SimpleTypeKind::UInt16Short && + (Ty->getName() == "wchar_t" || Ty->getName() == "__wchar_t")) + STK = SimpleTypeKind::WideCharacter; + if ((STK == SimpleTypeKind::SignedCharacter || + STK == SimpleTypeKind::UnsignedCharacter) && + Ty->getName() == "char") + STK = SimpleTypeKind::NarrowCharacter; + + return TypeIndex(STK); +} + +TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty) { + TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType()); + + // While processing the type being pointed to it is possible we already + // created this pointer type. If so, we check here and return the existing + // pointer type. + auto I = TypeIndices.find({Ty, nullptr}); + if (I != TypeIndices.end()) + return I->second; + + // Pointers to simple types can use SimpleTypeMode, rather than having a + // dedicated pointer type record. + if (PointeeTI.isSimple() && + PointeeTI.getSimpleMode() == SimpleTypeMode::Direct && + Ty->getTag() == dwarf::DW_TAG_pointer_type) { + SimpleTypeMode Mode = Ty->getSizeInBits() == 64 + ? SimpleTypeMode::NearPointer64 + : SimpleTypeMode::NearPointer32; + return TypeIndex(PointeeTI.getSimpleKind(), Mode); + } + + PointerKind PK = + Ty->getSizeInBits() == 64 ? PointerKind::Near64 : PointerKind::Near32; + PointerMode PM = PointerMode::Pointer; + switch (Ty->getTag()) { + default: llvm_unreachable("not a pointer tag type"); + case dwarf::DW_TAG_pointer_type: + PM = PointerMode::Pointer; + break; + case dwarf::DW_TAG_reference_type: + PM = PointerMode::LValueReference; + break; + case dwarf::DW_TAG_rvalue_reference_type: + PM = PointerMode::RValueReference; + break; + } + // FIXME: MSVC folds qualifiers into PointerOptions in the context of a method + // 'this' pointer, but not normal contexts. Figure out what we're supposed to + // do. + PointerOptions PO = PointerOptions::None; + PointerRecord PR(PointeeTI, PK, PM, PO, Ty->getSizeInBits() / 8); + return TypeTable.writePointer(PR); +} + +static PointerToMemberRepresentation +translatePtrToMemberRep(unsigned SizeInBytes, bool IsPMF, unsigned Flags) { + // SizeInBytes being zero generally implies that the member pointer type was + // incomplete, which can happen if it is part of a function prototype. In this + // case, use the unknown model instead of the general model. + if (IsPMF) { + switch (Flags & DINode::FlagPtrToMemberRep) { + case 0: + return SizeInBytes == 0 ? PointerToMemberRepresentation::Unknown + : PointerToMemberRepresentation::GeneralFunction; + case DINode::FlagSingleInheritance: + return PointerToMemberRepresentation::SingleInheritanceFunction; + case DINode::FlagMultipleInheritance: + return PointerToMemberRepresentation::MultipleInheritanceFunction; + case DINode::FlagVirtualInheritance: + return PointerToMemberRepresentation::VirtualInheritanceFunction; + } + } else { + switch (Flags & DINode::FlagPtrToMemberRep) { + case 0: + return SizeInBytes == 0 ? PointerToMemberRepresentation::Unknown + : PointerToMemberRepresentation::GeneralData; + case DINode::FlagSingleInheritance: + return PointerToMemberRepresentation::SingleInheritanceData; + case DINode::FlagMultipleInheritance: + return PointerToMemberRepresentation::MultipleInheritanceData; + case DINode::FlagVirtualInheritance: + return PointerToMemberRepresentation::VirtualInheritanceData; + } + } + llvm_unreachable("invalid ptr to member representation"); +} + +TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty) { + assert(Ty->getTag() == dwarf::DW_TAG_ptr_to_member_type); + TypeIndex ClassTI = getTypeIndex(Ty->getClassType()); + TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType(), Ty->getClassType()); + PointerKind PK = Asm->MAI->getPointerSize() == 8 ? PointerKind::Near64 + : PointerKind::Near32; + bool IsPMF = isa<DISubroutineType>(Ty->getBaseType()); + PointerMode PM = IsPMF ? PointerMode::PointerToMemberFunction + : PointerMode::PointerToDataMember; + PointerOptions PO = PointerOptions::None; // FIXME + assert(Ty->getSizeInBits() / 8 <= 0xff && "pointer size too big"); + uint8_t SizeInBytes = Ty->getSizeInBits() / 8; + MemberPointerInfo MPI( + ClassTI, translatePtrToMemberRep(SizeInBytes, IsPMF, Ty->getFlags())); + PointerRecord PR(PointeeTI, PK, PM, PO, SizeInBytes, MPI); + return TypeTable.writePointer(PR); +} + +/// Given a DWARF calling convention, get the CodeView equivalent. If we don't +/// have a translation, use the NearC convention. +static CallingConvention dwarfCCToCodeView(unsigned DwarfCC) { + switch (DwarfCC) { + case dwarf::DW_CC_normal: return CallingConvention::NearC; + case dwarf::DW_CC_BORLAND_msfastcall: return CallingConvention::NearFast; + case dwarf::DW_CC_BORLAND_thiscall: return CallingConvention::ThisCall; + case dwarf::DW_CC_BORLAND_stdcall: return CallingConvention::NearStdCall; + case dwarf::DW_CC_BORLAND_pascal: return CallingConvention::NearPascal; + case dwarf::DW_CC_LLVM_vectorcall: return CallingConvention::NearVector; + } + return CallingConvention::NearC; +} + +TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) { + ModifierOptions Mods = ModifierOptions::None; + bool IsModifier = true; + const DIType *BaseTy = Ty; + while (IsModifier && BaseTy) { + // FIXME: Need to add DWARF tag for __unaligned. + switch (BaseTy->getTag()) { + case dwarf::DW_TAG_const_type: + Mods |= ModifierOptions::Const; + break; + case dwarf::DW_TAG_volatile_type: + Mods |= ModifierOptions::Volatile; + break; + default: + IsModifier = false; + break; + } + if (IsModifier) + BaseTy = cast<DIDerivedType>(BaseTy)->getBaseType().resolve(); + } + TypeIndex ModifiedTI = getTypeIndex(BaseTy); + + // While processing the type being pointed to, it is possible we already + // created this modifier type. If so, we check here and return the existing + // modifier type. + auto I = TypeIndices.find({Ty, nullptr}); + if (I != TypeIndices.end()) + return I->second; + + ModifierRecord MR(ModifiedTI, Mods); + return TypeTable.writeModifier(MR); +} + +TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) { + SmallVector<TypeIndex, 8> ReturnAndArgTypeIndices; + for (DITypeRef ArgTypeRef : Ty->getTypeArray()) + ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef)); + + TypeIndex ReturnTypeIndex = TypeIndex::Void(); + ArrayRef<TypeIndex> ArgTypeIndices = None; + if (!ReturnAndArgTypeIndices.empty()) { + auto ReturnAndArgTypesRef = makeArrayRef(ReturnAndArgTypeIndices); + ReturnTypeIndex = ReturnAndArgTypesRef.front(); + ArgTypeIndices = ReturnAndArgTypesRef.drop_front(); + } + + ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices); + TypeIndex ArgListIndex = TypeTable.writeArgList(ArgListRec); + + CallingConvention CC = dwarfCCToCodeView(Ty->getCC()); + + ProcedureRecord Procedure(ReturnTypeIndex, CC, FunctionOptions::None, + ArgTypeIndices.size(), ArgListIndex); + return TypeTable.writeProcedure(Procedure); +} + +TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty, + const DIType *ClassTy, + int ThisAdjustment) { + // Lower the containing class type. + TypeIndex ClassType = getTypeIndex(ClassTy); + + SmallVector<TypeIndex, 8> ReturnAndArgTypeIndices; + for (DITypeRef ArgTypeRef : Ty->getTypeArray()) + ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef)); + + TypeIndex ReturnTypeIndex = TypeIndex::Void(); + ArrayRef<TypeIndex> ArgTypeIndices = None; + if (!ReturnAndArgTypeIndices.empty()) { + auto ReturnAndArgTypesRef = makeArrayRef(ReturnAndArgTypeIndices); + ReturnTypeIndex = ReturnAndArgTypesRef.front(); + ArgTypeIndices = ReturnAndArgTypesRef.drop_front(); + } + TypeIndex ThisTypeIndex = TypeIndex::Void(); + if (!ArgTypeIndices.empty()) { + ThisTypeIndex = ArgTypeIndices.front(); + ArgTypeIndices = ArgTypeIndices.drop_front(); + } + + ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices); + TypeIndex ArgListIndex = TypeTable.writeArgList(ArgListRec); + + CallingConvention CC = dwarfCCToCodeView(Ty->getCC()); + + // TODO: Need to use the correct values for: + // FunctionOptions + // ThisPointerAdjustment. + TypeIndex TI = TypeTable.writeMemberFunction(MemberFunctionRecord( + ReturnTypeIndex, ClassType, ThisTypeIndex, CC, FunctionOptions::None, + ArgTypeIndices.size(), ArgListIndex, ThisAdjustment)); + + return TI; +} + +static MemberAccess translateAccessFlags(unsigned RecordTag, unsigned Flags) { + switch (Flags & DINode::FlagAccessibility) { + case DINode::FlagPrivate: return MemberAccess::Private; + case DINode::FlagPublic: return MemberAccess::Public; + case DINode::FlagProtected: return MemberAccess::Protected; + case 0: + // If there was no explicit access control, provide the default for the tag. + return RecordTag == dwarf::DW_TAG_class_type ? MemberAccess::Private + : MemberAccess::Public; + } + llvm_unreachable("access flags are exclusive"); +} + +static MethodOptions translateMethodOptionFlags(const DISubprogram *SP) { + if (SP->isArtificial()) + return MethodOptions::CompilerGenerated; + + // FIXME: Handle other MethodOptions. + + return MethodOptions::None; +} + +static MethodKind translateMethodKindFlags(const DISubprogram *SP, + bool Introduced) { + switch (SP->getVirtuality()) { + case dwarf::DW_VIRTUALITY_none: + break; + case dwarf::DW_VIRTUALITY_virtual: + return Introduced ? MethodKind::IntroducingVirtual : MethodKind::Virtual; + case dwarf::DW_VIRTUALITY_pure_virtual: + return Introduced ? MethodKind::PureIntroducingVirtual + : MethodKind::PureVirtual; + default: + llvm_unreachable("unhandled virtuality case"); + } + + // FIXME: Get Clang to mark DISubprogram as static and do something with it. + + return MethodKind::Vanilla; +} + +static TypeRecordKind getRecordKind(const DICompositeType *Ty) { + switch (Ty->getTag()) { + case dwarf::DW_TAG_class_type: return TypeRecordKind::Class; + case dwarf::DW_TAG_structure_type: return TypeRecordKind::Struct; + } + llvm_unreachable("unexpected tag"); +} + +/// Return ClassOptions that should be present on both the forward declaration +/// and the defintion of a tag type. +static ClassOptions getCommonClassOptions(const DICompositeType *Ty) { + ClassOptions CO = ClassOptions::None; + + // MSVC always sets this flag, even for local types. Clang doesn't always + // appear to give every type a linkage name, which may be problematic for us. + // FIXME: Investigate the consequences of not following them here. + if (!Ty->getIdentifier().empty()) + CO |= ClassOptions::HasUniqueName; + + // Put the Nested flag on a type if it appears immediately inside a tag type. + // Do not walk the scope chain. Do not attempt to compute ContainsNestedClass + // here. That flag is only set on definitions, and not forward declarations. + const DIScope *ImmediateScope = Ty->getScope().resolve(); + if (ImmediateScope && isa<DICompositeType>(ImmediateScope)) + CO |= ClassOptions::Nested; + + // Put the Scoped flag on function-local types. + for (const DIScope *Scope = ImmediateScope; Scope != nullptr; + Scope = Scope->getScope().resolve()) { + if (isa<DISubprogram>(Scope)) { + CO |= ClassOptions::Scoped; + break; + } + } + + return CO; +} + +TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) { + ClassOptions CO = getCommonClassOptions(Ty); + TypeIndex FTI; + unsigned EnumeratorCount = 0; + + if (Ty->isForwardDecl()) { + CO |= ClassOptions::ForwardReference; + } else { + FieldListRecordBuilder Fields; + for (const DINode *Element : Ty->getElements()) { + // We assume that the frontend provides all members in source declaration + // order, which is what MSVC does. + if (auto *Enumerator = dyn_cast_or_null<DIEnumerator>(Element)) { + Fields.writeEnumerator(EnumeratorRecord( + MemberAccess::Public, APSInt::getUnsigned(Enumerator->getValue()), + Enumerator->getName())); + EnumeratorCount++; + } + } + FTI = TypeTable.writeFieldList(Fields); + } + + std::string FullName = getFullyQualifiedName(Ty); + + return TypeTable.writeEnum(EnumRecord(EnumeratorCount, CO, FTI, FullName, + Ty->getIdentifier(), + getTypeIndex(Ty->getBaseType()))); +} + +//===----------------------------------------------------------------------===// +// ClassInfo +//===----------------------------------------------------------------------===// + +struct llvm::ClassInfo { + struct MemberInfo { + const DIDerivedType *MemberTypeNode; + uint64_t BaseOffset; + }; + // [MemberInfo] + typedef std::vector<MemberInfo> MemberList; + + typedef TinyPtrVector<const DISubprogram *> MethodsList; + // MethodName -> MethodsList + typedef MapVector<MDString *, MethodsList> MethodsMap; + + /// Base classes. + std::vector<const DIDerivedType *> Inheritance; + + /// Direct members. + MemberList Members; + // Direct overloaded methods gathered by name. + MethodsMap Methods; + + std::vector<const DICompositeType *> NestedClasses; +}; + +void CodeViewDebug::clear() { + assert(CurFn == nullptr); + FileIdMap.clear(); + FnDebugInfo.clear(); + FileToFilepathMap.clear(); + LocalUDTs.clear(); + GlobalUDTs.clear(); + TypeIndices.clear(); + CompleteTypeIndices.clear(); +} + +void CodeViewDebug::collectMemberInfo(ClassInfo &Info, + const DIDerivedType *DDTy) { + if (!DDTy->getName().empty()) { + Info.Members.push_back({DDTy, 0}); + return; + } + // An unnamed member must represent a nested struct or union. Add all the + // indirect fields to the current record. + assert((DDTy->getOffsetInBits() % 8) == 0 && "Unnamed bitfield member!"); + uint64_t Offset = DDTy->getOffsetInBits(); + const DIType *Ty = DDTy->getBaseType().resolve(); + const DICompositeType *DCTy = cast<DICompositeType>(Ty); + ClassInfo NestedInfo = collectClassInfo(DCTy); + for (const ClassInfo::MemberInfo &IndirectField : NestedInfo.Members) + Info.Members.push_back( + {IndirectField.MemberTypeNode, IndirectField.BaseOffset + Offset}); +} + +ClassInfo CodeViewDebug::collectClassInfo(const DICompositeType *Ty) { + ClassInfo Info; + // Add elements to structure type. + DINodeArray Elements = Ty->getElements(); + for (auto *Element : Elements) { + // We assume that the frontend provides all members in source declaration + // order, which is what MSVC does. + if (!Element) + continue; + if (auto *SP = dyn_cast<DISubprogram>(Element)) { + Info.Methods[SP->getRawName()].push_back(SP); + } else if (auto *DDTy = dyn_cast<DIDerivedType>(Element)) { + if (DDTy->getTag() == dwarf::DW_TAG_member) { + collectMemberInfo(Info, DDTy); + } else if (DDTy->getTag() == dwarf::DW_TAG_inheritance) { + Info.Inheritance.push_back(DDTy); + } else if (DDTy->getTag() == dwarf::DW_TAG_friend) { + // Ignore friend members. It appears that MSVC emitted info about + // friends in the past, but modern versions do not. + } + // FIXME: Get Clang to emit function virtual table here and handle it. + } else if (auto *Composite = dyn_cast<DICompositeType>(Element)) { + Info.NestedClasses.push_back(Composite); + } + // Skip other unrecognized kinds of elements. + } + return Info; +} + +TypeIndex CodeViewDebug::lowerTypeClass(const DICompositeType *Ty) { + // First, construct the forward decl. Don't look into Ty to compute the + // forward decl options, since it might not be available in all TUs. + TypeRecordKind Kind = getRecordKind(Ty); + ClassOptions CO = + ClassOptions::ForwardReference | getCommonClassOptions(Ty); + std::string FullName = getFullyQualifiedName(Ty); + TypeIndex FwdDeclTI = TypeTable.writeClass(ClassRecord( + Kind, 0, CO, HfaKind::None, WindowsRTClassKind::None, TypeIndex(), + TypeIndex(), TypeIndex(), 0, FullName, Ty->getIdentifier())); + if (!Ty->isForwardDecl()) + DeferredCompleteTypes.push_back(Ty); + return FwdDeclTI; +} + +TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) { + // Construct the field list and complete type record. + TypeRecordKind Kind = getRecordKind(Ty); + ClassOptions CO = getCommonClassOptions(Ty); + TypeIndex FieldTI; + TypeIndex VShapeTI; + unsigned FieldCount; + bool ContainsNestedClass; + std::tie(FieldTI, VShapeTI, FieldCount, ContainsNestedClass) = + lowerRecordFieldList(Ty); + + if (ContainsNestedClass) + CO |= ClassOptions::ContainsNestedClass; + + std::string FullName = getFullyQualifiedName(Ty); + + uint64_t SizeInBytes = Ty->getSizeInBits() / 8; + + TypeIndex ClassTI = TypeTable.writeClass(ClassRecord( + Kind, FieldCount, CO, HfaKind::None, WindowsRTClassKind::None, FieldTI, + TypeIndex(), VShapeTI, SizeInBytes, FullName, Ty->getIdentifier())); + + TypeTable.writeUdtSourceLine(UdtSourceLineRecord( + ClassTI, TypeTable.writeStringId(StringIdRecord( + TypeIndex(0x0), getFullFilepath(Ty->getFile()))), + Ty->getLine())); + + addToUDTs(Ty, ClassTI); + + return ClassTI; +} + +TypeIndex CodeViewDebug::lowerTypeUnion(const DICompositeType *Ty) { + ClassOptions CO = + ClassOptions::ForwardReference | getCommonClassOptions(Ty); + std::string FullName = getFullyQualifiedName(Ty); + TypeIndex FwdDeclTI = + TypeTable.writeUnion(UnionRecord(0, CO, HfaKind::None, TypeIndex(), 0, + FullName, Ty->getIdentifier())); + if (!Ty->isForwardDecl()) + DeferredCompleteTypes.push_back(Ty); + return FwdDeclTI; +} + +TypeIndex CodeViewDebug::lowerCompleteTypeUnion(const DICompositeType *Ty) { + ClassOptions CO = ClassOptions::Sealed | getCommonClassOptions(Ty); + TypeIndex FieldTI; + unsigned FieldCount; + bool ContainsNestedClass; + std::tie(FieldTI, std::ignore, FieldCount, ContainsNestedClass) = + lowerRecordFieldList(Ty); + + if (ContainsNestedClass) + CO |= ClassOptions::ContainsNestedClass; + + uint64_t SizeInBytes = Ty->getSizeInBits() / 8; + std::string FullName = getFullyQualifiedName(Ty); + + TypeIndex UnionTI = TypeTable.writeUnion( + UnionRecord(FieldCount, CO, HfaKind::None, FieldTI, SizeInBytes, FullName, + Ty->getIdentifier())); + + TypeTable.writeUdtSourceLine(UdtSourceLineRecord( + UnionTI, TypeTable.writeStringId(StringIdRecord( + TypeIndex(0x0), getFullFilepath(Ty->getFile()))), + Ty->getLine())); + + addToUDTs(Ty, UnionTI); + + return UnionTI; +} + +std::tuple<TypeIndex, TypeIndex, unsigned, bool> +CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { + // Manually count members. MSVC appears to count everything that generates a + // field list record. Each individual overload in a method overload group + // contributes to this count, even though the overload group is a single field + // list record. + unsigned MemberCount = 0; + ClassInfo Info = collectClassInfo(Ty); + FieldListRecordBuilder Fields; + + // Create base classes. + for (const DIDerivedType *I : Info.Inheritance) { + if (I->getFlags() & DINode::FlagVirtual) { + // Virtual base. + // FIXME: Emit VBPtrOffset when the frontend provides it. + unsigned VBPtrOffset = 0; + // FIXME: Despite the accessor name, the offset is really in bytes. + unsigned VBTableIndex = I->getOffsetInBits() / 4; + Fields.writeVirtualBaseClass(VirtualBaseClassRecord( + translateAccessFlags(Ty->getTag(), I->getFlags()), + getTypeIndex(I->getBaseType()), getVBPTypeIndex(), VBPtrOffset, + VBTableIndex)); + } else { + assert(I->getOffsetInBits() % 8 == 0 && + "bases must be on byte boundaries"); + Fields.writeBaseClass(BaseClassRecord( + translateAccessFlags(Ty->getTag(), I->getFlags()), + getTypeIndex(I->getBaseType()), I->getOffsetInBits() / 8)); + } + } + + // Create members. + for (ClassInfo::MemberInfo &MemberInfo : Info.Members) { + const DIDerivedType *Member = MemberInfo.MemberTypeNode; + TypeIndex MemberBaseType = getTypeIndex(Member->getBaseType()); + StringRef MemberName = Member->getName(); + MemberAccess Access = + translateAccessFlags(Ty->getTag(), Member->getFlags()); + + if (Member->isStaticMember()) { + Fields.writeStaticDataMember( + StaticDataMemberRecord(Access, MemberBaseType, MemberName)); + MemberCount++; + continue; + } + + // Data member. + uint64_t MemberOffsetInBits = + Member->getOffsetInBits() + MemberInfo.BaseOffset; + if (Member->isBitField()) { + uint64_t StartBitOffset = MemberOffsetInBits; + if (const auto *CI = + dyn_cast_or_null<ConstantInt>(Member->getStorageOffsetInBits())) { + MemberOffsetInBits = CI->getZExtValue() + MemberInfo.BaseOffset; + } + StartBitOffset -= MemberOffsetInBits; + MemberBaseType = TypeTable.writeBitField(BitFieldRecord( + MemberBaseType, Member->getSizeInBits(), StartBitOffset)); + } + uint64_t MemberOffsetInBytes = MemberOffsetInBits / 8; + Fields.writeDataMember(DataMemberRecord(Access, MemberBaseType, + MemberOffsetInBytes, MemberName)); + MemberCount++; + } + + // Create methods + for (auto &MethodItr : Info.Methods) { + StringRef Name = MethodItr.first->getString(); + + std::vector<OneMethodRecord> Methods; + for (const DISubprogram *SP : MethodItr.second) { + TypeIndex MethodType = getMemberFunctionType(SP, Ty); + bool Introduced = SP->getFlags() & DINode::FlagIntroducedVirtual; + + unsigned VFTableOffset = -1; + if (Introduced) + VFTableOffset = SP->getVirtualIndex() * getPointerSizeInBytes(); + + Methods.push_back( + OneMethodRecord(MethodType, translateMethodKindFlags(SP, Introduced), + translateMethodOptionFlags(SP), + translateAccessFlags(Ty->getTag(), SP->getFlags()), + VFTableOffset, Name)); + MemberCount++; + } + assert(Methods.size() > 0 && "Empty methods map entry"); + if (Methods.size() == 1) + Fields.writeOneMethod(Methods[0]); + else { + TypeIndex MethodList = + TypeTable.writeMethodOverloadList(MethodOverloadListRecord(Methods)); + Fields.writeOverloadedMethod( + OverloadedMethodRecord(Methods.size(), MethodList, Name)); + } + } + + // Create nested classes. + for (const DICompositeType *Nested : Info.NestedClasses) { + NestedTypeRecord R(getTypeIndex(DITypeRef(Nested)), Nested->getName()); + Fields.writeNestedType(R); + MemberCount++; + } + + TypeIndex FieldTI = TypeTable.writeFieldList(Fields); + return std::make_tuple(FieldTI, TypeIndex(), MemberCount, + !Info.NestedClasses.empty()); +} + +TypeIndex CodeViewDebug::getVBPTypeIndex() { + if (!VBPType.getIndex()) { + // Make a 'const int *' type. + ModifierRecord MR(TypeIndex::Int32(), ModifierOptions::Const); + TypeIndex ModifiedTI = TypeTable.writeModifier(MR); + + PointerKind PK = getPointerSizeInBytes() == 8 ? PointerKind::Near64 + : PointerKind::Near32; + PointerMode PM = PointerMode::Pointer; + PointerOptions PO = PointerOptions::None; + PointerRecord PR(ModifiedTI, PK, PM, PO, getPointerSizeInBytes()); + + VBPType = TypeTable.writePointer(PR); + } + + return VBPType; +} + +TypeIndex CodeViewDebug::getTypeIndex(DITypeRef TypeRef, DITypeRef ClassTyRef) { + const DIType *Ty = TypeRef.resolve(); + const DIType *ClassTy = ClassTyRef.resolve(); + + // The null DIType is the void type. Don't try to hash it. + if (!Ty) + return TypeIndex::Void(); + + // Check if we've already translated this type. Don't try to do a + // get-or-create style insertion that caches the hash lookup across the + // lowerType call. It will update the TypeIndices map. + auto I = TypeIndices.find({Ty, ClassTy}); + if (I != TypeIndices.end()) + return I->second; + + TypeLoweringScope S(*this); + TypeIndex TI = lowerType(Ty, ClassTy); + return recordTypeIndexForDINode(Ty, TI, ClassTy); +} + +TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) { + const DIType *Ty = TypeRef.resolve(); + + // The null DIType is the void type. Don't try to hash it. + if (!Ty) + return TypeIndex::Void(); + + // If this is a non-record type, the complete type index is the same as the + // normal type index. Just call getTypeIndex. + switch (Ty->getTag()) { + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + break; + default: + return getTypeIndex(Ty); + } + + // Check if we've already translated the complete record type. Lowering a + // complete type should never trigger lowering another complete type, so we + // can reuse the hash table lookup result. + const auto *CTy = cast<DICompositeType>(Ty); + auto InsertResult = CompleteTypeIndices.insert({CTy, TypeIndex()}); + if (!InsertResult.second) + return InsertResult.first->second; + + TypeLoweringScope S(*this); + + // Make sure the forward declaration is emitted first. It's unclear if this + // is necessary, but MSVC does it, and we should follow suit until we can show + // otherwise. + TypeIndex FwdDeclTI = getTypeIndex(CTy); + + // Just use the forward decl if we don't have complete type info. This might + // happen if the frontend is using modules and expects the complete definition + // to be emitted elsewhere. + if (CTy->isForwardDecl()) + return FwdDeclTI; + + TypeIndex TI; + switch (CTy->getTag()) { + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_structure_type: + TI = lowerCompleteTypeClass(CTy); + break; + case dwarf::DW_TAG_union_type: + TI = lowerCompleteTypeUnion(CTy); + break; + default: + llvm_unreachable("not a record"); + } + + InsertResult.first->second = TI; + return TI; +} + +/// Emit all the deferred complete record types. Try to do this in FIFO order, +/// and do this until fixpoint, as each complete record type typically +/// references +/// many other record types. +void CodeViewDebug::emitDeferredCompleteTypes() { + SmallVector<const DICompositeType *, 4> TypesToEmit; + while (!DeferredCompleteTypes.empty()) { + std::swap(DeferredCompleteTypes, TypesToEmit); + for (const DICompositeType *RecordTy : TypesToEmit) + getCompleteTypeIndex(RecordTy); + TypesToEmit.clear(); + } +} + +void CodeViewDebug::emitLocalVariableList(ArrayRef<LocalVariable> Locals) { + // Get the sorted list of parameters and emit them first. + SmallVector<const LocalVariable *, 6> Params; + for (const LocalVariable &L : Locals) + if (L.DIVar->isParameter()) + Params.push_back(&L); + std::sort(Params.begin(), Params.end(), + [](const LocalVariable *L, const LocalVariable *R) { + return L->DIVar->getArg() < R->DIVar->getArg(); + }); + for (const LocalVariable *L : Params) + emitLocalVariable(*L); + + // Next emit all non-parameters in the order that we found them. + for (const LocalVariable &L : Locals) + if (!L.DIVar->isParameter()) + emitLocalVariable(L); +} + +void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) { + // LocalSym record, see SymbolRecord.h for more info. + MCSymbol *LocalBegin = MMI->getContext().createTempSymbol(), + *LocalEnd = MMI->getContext().createTempSymbol(); + OS.AddComment("Record length"); + OS.emitAbsoluteSymbolDiff(LocalEnd, LocalBegin, 2); + OS.EmitLabel(LocalBegin); + + OS.AddComment("Record kind: S_LOCAL"); + OS.EmitIntValue(unsigned(SymbolKind::S_LOCAL), 2); + + LocalSymFlags Flags = LocalSymFlags::None; + if (Var.DIVar->isParameter()) + Flags |= LocalSymFlags::IsParameter; + if (Var.DefRanges.empty()) + Flags |= LocalSymFlags::IsOptimizedOut; + + OS.AddComment("TypeIndex"); + TypeIndex TI = getCompleteTypeIndex(Var.DIVar->getType()); + OS.EmitIntValue(TI.getIndex(), 4); + OS.AddComment("Flags"); + OS.EmitIntValue(static_cast<uint16_t>(Flags), 2); + // Truncate the name so we won't overflow the record length field. + emitNullTerminatedSymbolName(OS, Var.DIVar->getName()); + OS.EmitLabel(LocalEnd); + + // Calculate the on disk prefix of the appropriate def range record. The + // records and on disk formats are described in SymbolRecords.h. BytePrefix + // should be big enough to hold all forms without memory allocation. + SmallString<20> BytePrefix; + for (const LocalVarDefRange &DefRange : Var.DefRanges) { + BytePrefix.clear(); + // FIXME: Handle bitpieces. + if (DefRange.StructOffset != 0) + continue; + + if (DefRange.InMemory) { + DefRangeRegisterRelSym Sym(DefRange.CVRegister, 0, DefRange.DataOffset, 0, + 0, 0, ArrayRef<LocalVariableAddrGap>()); + ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER_REL); + BytePrefix += + StringRef(reinterpret_cast<const char *>(&SymKind), sizeof(SymKind)); + BytePrefix += + StringRef(reinterpret_cast<const char *>(&Sym.Header), + sizeof(Sym.Header) - sizeof(LocalVariableAddrRange)); + } else { + assert(DefRange.DataOffset == 0 && "unexpected offset into register"); + // Unclear what matters here. + DefRangeRegisterSym Sym(DefRange.CVRegister, 0, 0, 0, 0, + ArrayRef<LocalVariableAddrGap>()); + ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER); + BytePrefix += + StringRef(reinterpret_cast<const char *>(&SymKind), sizeof(SymKind)); + BytePrefix += + StringRef(reinterpret_cast<const char *>(&Sym.Header), + sizeof(Sym.Header) - sizeof(LocalVariableAddrRange)); + } + OS.EmitCVDefRangeDirective(DefRange.Ranges, BytePrefix); + } +} + +void CodeViewDebug::endFunction(const MachineFunction *MF) { + if (!Asm || !CurFn) // We haven't created any debug info for this function. + return; + + const Function *GV = MF->getFunction(); + assert(FnDebugInfo.count(GV)); + assert(CurFn == &FnDebugInfo[GV]); + + collectVariableInfo(GV->getSubprogram()); + + DebugHandlerBase::endFunction(MF); + + // Don't emit anything if we don't have any line tables. + if (!CurFn->HaveLineInfo) { + FnDebugInfo.erase(GV); + CurFn = nullptr; + return; + } + + CurFn->End = Asm->getFunctionEnd(); + + CurFn = nullptr; +} + +void CodeViewDebug::beginInstruction(const MachineInstr *MI) { + DebugHandlerBase::beginInstruction(MI); + + // Ignore DBG_VALUE locations and function prologue. + if (!Asm || !CurFn || MI->isDebugValue() || + MI->getFlag(MachineInstr::FrameSetup)) + return; + DebugLoc DL = MI->getDebugLoc(); + if (DL == PrevInstLoc || !DL) + return; + maybeRecordLocation(DL, Asm->MF); +} + +MCSymbol *CodeViewDebug::beginCVSubsection(ModuleSubstreamKind Kind) { + MCSymbol *BeginLabel = MMI->getContext().createTempSymbol(), + *EndLabel = MMI->getContext().createTempSymbol(); + OS.EmitIntValue(unsigned(Kind), 4); + OS.AddComment("Subsection size"); + OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 4); + OS.EmitLabel(BeginLabel); + return EndLabel; +} + +void CodeViewDebug::endCVSubsection(MCSymbol *EndLabel) { + OS.EmitLabel(EndLabel); + // Every subsection must be aligned to a 4-byte boundary. + OS.EmitValueToAlignment(4); +} + +void CodeViewDebug::emitDebugInfoForUDTs( + ArrayRef<std::pair<std::string, TypeIndex>> UDTs) { + for (const std::pair<std::string, codeview::TypeIndex> &UDT : UDTs) { + MCSymbol *UDTRecordBegin = MMI->getContext().createTempSymbol(), + *UDTRecordEnd = MMI->getContext().createTempSymbol(); + OS.AddComment("Record length"); + OS.emitAbsoluteSymbolDiff(UDTRecordEnd, UDTRecordBegin, 2); + OS.EmitLabel(UDTRecordBegin); + + OS.AddComment("Record kind: S_UDT"); + OS.EmitIntValue(unsigned(SymbolKind::S_UDT), 2); + + OS.AddComment("Type"); + OS.EmitIntValue(UDT.second.getIndex(), 4); + + emitNullTerminatedSymbolName(OS, UDT.first); + OS.EmitLabel(UDTRecordEnd); + } +} + +void CodeViewDebug::emitDebugInfoForGlobals() { + NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); + for (const MDNode *Node : CUs->operands()) { + const auto *CU = cast<DICompileUnit>(Node); + + // First, emit all globals that are not in a comdat in a single symbol + // substream. MSVC doesn't like it if the substream is empty, so only open + // it if we have at least one global to emit. + switchToDebugSectionForSymbol(nullptr); + MCSymbol *EndLabel = nullptr; + for (const DIGlobalVariable *G : CU->getGlobalVariables()) { + if (const auto *GV = dyn_cast_or_null<GlobalVariable>(G->getVariable())) { + if (!GV->hasComdat() && !GV->isDeclarationForLinker()) { + if (!EndLabel) { + OS.AddComment("Symbol subsection for globals"); + EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols); + } + emitDebugInfoForGlobal(G, Asm->getSymbol(GV)); + } + } + } + if (EndLabel) + endCVSubsection(EndLabel); + + // Second, emit each global that is in a comdat into its own .debug$S + // section along with its own symbol substream. + for (const DIGlobalVariable *G : CU->getGlobalVariables()) { + if (const auto *GV = dyn_cast_or_null<GlobalVariable>(G->getVariable())) { + if (GV->hasComdat()) { + MCSymbol *GVSym = Asm->getSymbol(GV); + OS.AddComment("Symbol subsection for " + + Twine(GlobalValue::getRealLinkageName(GV->getName()))); + switchToDebugSectionForSymbol(GVSym); + EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols); + emitDebugInfoForGlobal(G, GVSym); + endCVSubsection(EndLabel); + } + } + } + } +} + +void CodeViewDebug::emitDebugInfoForRetainedTypes() { + NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); + for (const MDNode *Node : CUs->operands()) { + for (auto *Ty : cast<DICompileUnit>(Node)->getRetainedTypes()) { + if (DIType *RT = dyn_cast<DIType>(Ty)) { + getTypeIndex(RT); + // FIXME: Add to global/local DTU list. + } + } + } +} + +void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV, + MCSymbol *GVSym) { + // DataSym record, see SymbolRecord.h for more info. + // FIXME: Thread local data, etc + MCSymbol *DataBegin = MMI->getContext().createTempSymbol(), + *DataEnd = MMI->getContext().createTempSymbol(); + OS.AddComment("Record length"); + OS.emitAbsoluteSymbolDiff(DataEnd, DataBegin, 2); + OS.EmitLabel(DataBegin); + const auto *GV = cast<GlobalVariable>(DIGV->getVariable()); + if (DIGV->isLocalToUnit()) { + if (GV->isThreadLocal()) { + OS.AddComment("Record kind: S_LTHREAD32"); + OS.EmitIntValue(unsigned(SymbolKind::S_LTHREAD32), 2); + } else { + OS.AddComment("Record kind: S_LDATA32"); + OS.EmitIntValue(unsigned(SymbolKind::S_LDATA32), 2); + } + } else { + if (GV->isThreadLocal()) { + OS.AddComment("Record kind: S_GTHREAD32"); + OS.EmitIntValue(unsigned(SymbolKind::S_GTHREAD32), 2); + } else { + OS.AddComment("Record kind: S_GDATA32"); + OS.EmitIntValue(unsigned(SymbolKind::S_GDATA32), 2); + } + } + OS.AddComment("Type"); + OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4); + OS.AddComment("DataOffset"); + OS.EmitCOFFSecRel32(GVSym); + OS.AddComment("Segment"); + OS.EmitCOFFSectionIndex(GVSym); + OS.AddComment("Name"); + emitNullTerminatedSymbolName(OS, DIGV->getName()); + OS.EmitLabel(DataEnd); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h new file mode 100644 index 0000000..e4bbd61 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -0,0 +1,310 @@ +//===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h ----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing Microsoft CodeView debug info. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H + +#include "DebugHandlerBase.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +namespace llvm { + +class StringRef; +class LexicalScope; +struct ClassInfo; + +/// \brief Collects and handles line tables information in a CodeView format. +class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { + MCStreamer &OS; + codeview::MemoryTypeTableBuilder TypeTable; + + /// Represents the most general definition range. + struct LocalVarDefRange { + /// Indicates that variable data is stored in memory relative to the + /// specified register. + int InMemory : 1; + + /// Offset of variable data in memory. + int DataOffset : 31; + + /// Offset of the data into the user level struct. If zero, no splitting + /// occurred. + uint16_t StructOffset; + + /// Register containing the data or the register base of the memory + /// location containing the data. + uint16_t CVRegister; + + /// Compares all location fields. This includes all fields except the label + /// ranges. + bool isDifferentLocation(LocalVarDefRange &O) { + return InMemory != O.InMemory || DataOffset != O.DataOffset || + StructOffset != O.StructOffset || CVRegister != O.CVRegister; + } + + SmallVector<std::pair<const MCSymbol *, const MCSymbol *>, 1> Ranges; + }; + + static LocalVarDefRange createDefRangeMem(uint16_t CVRegister, int Offset); + static LocalVarDefRange createDefRangeReg(uint16_t CVRegister); + + /// Similar to DbgVariable in DwarfDebug, but not dwarf-specific. + struct LocalVariable { + const DILocalVariable *DIVar = nullptr; + SmallVector<LocalVarDefRange, 1> DefRanges; + }; + + struct InlineSite { + SmallVector<LocalVariable, 1> InlinedLocals; + SmallVector<const DILocation *, 1> ChildSites; + const DISubprogram *Inlinee = nullptr; + + /// The ID of the inline site or function used with .cv_loc. Not a type + /// index. + unsigned SiteFuncId = 0; + }; + + // For each function, store a vector of labels to its instructions, as well as + // to the end of the function. + struct FunctionInfo { + /// Map from inlined call site to inlined instructions and child inlined + /// call sites. Listed in program order. + std::unordered_map<const DILocation *, InlineSite> InlineSites; + + /// Ordered list of top-level inlined call sites. + SmallVector<const DILocation *, 1> ChildSites; + + SmallVector<LocalVariable, 1> Locals; + + DebugLoc LastLoc; + const MCSymbol *Begin = nullptr; + const MCSymbol *End = nullptr; + unsigned FuncId = 0; + unsigned LastFileId = 0; + bool HaveLineInfo = false; + }; + FunctionInfo *CurFn; + + /// The set of comdat .debug$S sections that we've seen so far. Each section + /// must start with a magic version number that must only be emitted once. + /// This set tracks which sections we've already opened. + DenseSet<MCSectionCOFF *> ComdatDebugSections; + + /// Switch to the appropriate .debug$S section for GVSym. If GVSym, the symbol + /// of an emitted global value, is in a comdat COFF section, this will switch + /// to a new .debug$S section in that comdat. This method ensures that the + /// section starts with the magic version number on first use. If GVSym is + /// null, uses the main .debug$S section. + void switchToDebugSectionForSymbol(const MCSymbol *GVSym); + + /// The next available function index for use with our .cv_* directives. Not + /// to be confused with type indices for LF_FUNC_ID records. + unsigned NextFuncId = 0; + + InlineSite &getInlineSite(const DILocation *InlinedAt, + const DISubprogram *Inlinee); + + codeview::TypeIndex getFuncIdForSubprogram(const DISubprogram *SP); + + static void collectInlineSiteChildren(SmallVectorImpl<unsigned> &Children, + const FunctionInfo &FI, + const InlineSite &Site); + + /// Remember some debug info about each function. Keep it in a stable order to + /// emit at the end of the TU. + MapVector<const Function *, FunctionInfo> FnDebugInfo; + + /// Map from DIFile to .cv_file id. + DenseMap<const DIFile *, unsigned> FileIdMap; + + /// All inlined subprograms in the order they should be emitted. + SmallSetVector<const DISubprogram *, 4> InlinedSubprograms; + + /// Map from a pair of DI metadata nodes and its DI type (or scope) that can + /// be nullptr, to CodeView type indices. Primarily indexed by + /// {DIType*, DIType*} and {DISubprogram*, DIType*}. + /// + /// The second entry in the key is needed for methods as DISubroutineType + /// representing static method type are shared with non-method function type. + DenseMap<std::pair<const DINode *, const DIType *>, codeview::TypeIndex> + TypeIndices; + + /// Map from DICompositeType* to complete type index. Non-record types are + /// always looked up in the normal TypeIndices map. + DenseMap<const DICompositeType *, codeview::TypeIndex> CompleteTypeIndices; + + /// Complete record types to emit after all active type lowerings are + /// finished. + SmallVector<const DICompositeType *, 4> DeferredCompleteTypes; + + /// Number of type lowering frames active on the stack. + unsigned TypeEmissionLevel = 0; + + codeview::TypeIndex VBPType; + + const DISubprogram *CurrentSubprogram = nullptr; + + // The UDTs we have seen while processing types; each entry is a pair of type + // index and type name. + std::vector<std::pair<std::string, codeview::TypeIndex>> LocalUDTs, + GlobalUDTs; + + typedef std::map<const DIFile *, std::string> FileToFilepathMapTy; + FileToFilepathMapTy FileToFilepathMap; + StringRef getFullFilepath(const DIFile *S); + + unsigned maybeRecordFile(const DIFile *F); + + void maybeRecordLocation(const DebugLoc &DL, const MachineFunction *MF); + + void clear(); + + void setCurrentSubprogram(const DISubprogram *SP) { + CurrentSubprogram = SP; + LocalUDTs.clear(); + } + + /// Emit the magic version number at the start of a CodeView type or symbol + /// section. Appears at the front of every .debug$S or .debug$T section. + void emitCodeViewMagicVersion(); + + void emitTypeInformation(); + + void emitInlineeLinesSubsection(); + + void emitDebugInfoForFunction(const Function *GV, FunctionInfo &FI); + + void emitDebugInfoForGlobals(); + + void emitDebugInfoForRetainedTypes(); + + void emitDebugInfoForUDTs( + ArrayRef<std::pair<std::string, codeview::TypeIndex>> UDTs); + + void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV, MCSymbol *GVSym); + + /// Opens a subsection of the given kind in a .debug$S codeview section. + /// Returns an end label for use with endCVSubsection when the subsection is + /// finished. + MCSymbol *beginCVSubsection(codeview::ModuleSubstreamKind Kind); + + void endCVSubsection(MCSymbol *EndLabel); + + void emitInlinedCallSite(const FunctionInfo &FI, const DILocation *InlinedAt, + const InlineSite &Site); + + typedef DbgValueHistoryMap::InlinedVariable InlinedVariable; + + void collectVariableInfo(const DISubprogram *SP); + + void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &Processed); + + /// Records information about a local variable in the appropriate scope. In + /// particular, locals from inlined code live inside the inlining site. + void recordLocalVariable(LocalVariable &&Var, const DILocation *Loc); + + /// Emits local variables in the appropriate order. + void emitLocalVariableList(ArrayRef<LocalVariable> Locals); + + /// Emits an S_LOCAL record and its associated defined ranges. + void emitLocalVariable(const LocalVariable &Var); + + /// Translates the DIType to codeview if necessary and returns a type index + /// for it. + codeview::TypeIndex getTypeIndex(DITypeRef TypeRef, + DITypeRef ClassTyRef = DITypeRef()); + + codeview::TypeIndex getMemberFunctionType(const DISubprogram *SP, + const DICompositeType *Class); + + codeview::TypeIndex getScopeIndex(const DIScope *Scope); + + codeview::TypeIndex getVBPTypeIndex(); + + void addToUDTs(const DIType *Ty, codeview::TypeIndex TI); + + codeview::TypeIndex lowerType(const DIType *Ty, const DIType *ClassTy); + codeview::TypeIndex lowerTypeAlias(const DIDerivedType *Ty); + codeview::TypeIndex lowerTypeArray(const DICompositeType *Ty); + codeview::TypeIndex lowerTypeBasic(const DIBasicType *Ty); + codeview::TypeIndex lowerTypePointer(const DIDerivedType *Ty); + codeview::TypeIndex lowerTypeMemberPointer(const DIDerivedType *Ty); + codeview::TypeIndex lowerTypeModifier(const DIDerivedType *Ty); + codeview::TypeIndex lowerTypeFunction(const DISubroutineType *Ty); + codeview::TypeIndex lowerTypeMemberFunction(const DISubroutineType *Ty, + const DIType *ClassTy, + int ThisAdjustment); + codeview::TypeIndex lowerTypeEnum(const DICompositeType *Ty); + codeview::TypeIndex lowerTypeClass(const DICompositeType *Ty); + codeview::TypeIndex lowerTypeUnion(const DICompositeType *Ty); + + /// Symbol records should point to complete types, but type records should + /// always point to incomplete types to avoid cycles in the type graph. Only + /// use this entry point when generating symbol records. The complete and + /// incomplete type indices only differ for record types. All other types use + /// the same index. + codeview::TypeIndex getCompleteTypeIndex(DITypeRef TypeRef); + + codeview::TypeIndex lowerCompleteTypeClass(const DICompositeType *Ty); + codeview::TypeIndex lowerCompleteTypeUnion(const DICompositeType *Ty); + + struct TypeLoweringScope; + + void emitDeferredCompleteTypes(); + + void collectMemberInfo(ClassInfo &Info, const DIDerivedType *DDTy); + ClassInfo collectClassInfo(const DICompositeType *Ty); + + /// Common record member lowering functionality for record types, which are + /// structs, classes, and unions. Returns the field list index and the member + /// count. + std::tuple<codeview::TypeIndex, codeview::TypeIndex, unsigned, bool> + lowerRecordFieldList(const DICompositeType *Ty); + + /// Inserts {{Node, ClassTy}, TI} into TypeIndices and checks for duplicates. + codeview::TypeIndex recordTypeIndexForDINode(const DINode *Node, + codeview::TypeIndex TI, + const DIType *ClassTy = nullptr); + + unsigned getPointerSizeInBytes(); + +public: + CodeViewDebug(AsmPrinter *Asm); + + void setSymbolSize(const llvm::MCSymbol *, uint64_t) override {} + + /// \brief Emit the COFF section that holds the line table information. + void endModule() override; + + /// \brief Gather pre-function debug information. + void beginFunction(const MachineFunction *MF) override; + + /// \brief Gather post-function debug information. + void endFunction(const MachineFunction *) override; + + /// \brief Process beginning of an instruction. + void beginInstruction(const MachineInstr *MI) override; +}; +} // End of namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 7b0cdbd..2aaa85a 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -32,39 +32,6 @@ using namespace llvm; //===----------------------------------------------------------------------===// -// EmittingAsmStreamer Implementation -//===----------------------------------------------------------------------===// -unsigned EmittingAsmStreamer::emitULEB128(uint64_t Value, const char *Desc, - unsigned PadTo) { - AP->EmitULEB128(Value, Desc, PadTo); - return 0; -} - -unsigned EmittingAsmStreamer::emitInt8(unsigned char Value) { - AP->EmitInt8(Value); - return 0; -} - -unsigned EmittingAsmStreamer::emitBytes(StringRef Data) { - AP->OutStreamer->EmitBytes(Data); - return 0; -} - -//===----------------------------------------------------------------------===// -// SizeReporterAsmStreamer Implementation -//===----------------------------------------------------------------------===// -unsigned SizeReporterAsmStreamer::emitULEB128(uint64_t Value, const char *Desc, - unsigned PadTo) { - return getULEB128Size(Value); -} - -unsigned SizeReporterAsmStreamer::emitInt8(unsigned char Value) { return 1; } - -unsigned SizeReporterAsmStreamer::emitBytes(StringRef Data) { - return Data.size(); -} - -//===----------------------------------------------------------------------===// // DIEAbbrevData Implementation //===----------------------------------------------------------------------===// @@ -512,20 +479,6 @@ void DIEEntry::print(raw_ostream &O) const { } //===----------------------------------------------------------------------===// -// DIETypeSignature Implementation -//===----------------------------------------------------------------------===// -void DIETypeSignature::EmitValue(const AsmPrinter *Asm, - dwarf::Form Form) const { - assert(Form == dwarf::DW_FORM_ref_sig8); - Asm->OutStreamer->EmitIntValue(Unit->getTypeSignature(), 8); -} - -LLVM_DUMP_METHOD -void DIETypeSignature::print(raw_ostream &O) const { - O << format("Type Unit: 0x%lx", Unit->getTypeSignature()); -} - -//===----------------------------------------------------------------------===// // DIELoc Implementation //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index 0201065..74c47d1 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -279,7 +279,7 @@ void DIEHash::hashLocList(const DIELocList &LocList) { // Hash an individual attribute \param Attr based on the type of attribute and // the form. -void DIEHash::hashAttribute(DIEValue Value, dwarf::Tag Tag) { +void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) { dwarf::Attribute Attribute = Value.getAttribute(); // Other attribute values use the letter 'A' as the marker, and the value @@ -353,7 +353,6 @@ void DIEHash::hashAttribute(DIEValue Value, dwarf::Tag Tag) { case DIEValue::isExpr: case DIEValue::isLabel: case DIEValue::isDelta: - case DIEValue::isTypeSignature: llvm_unreachable("Add support for additional value types."); } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h index 44f0ce8..996cd7e 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h @@ -131,7 +131,7 @@ private: void hashLocList(const DIELocList &LocList); /// \brief Hashes an individual attribute. - void hashAttribute(DIEValue Value, dwarf::Tag Tag); + void hashAttribute(const DIEValue &Value, dwarf::Tag Tag); /// \brief Hashes an attribute that refers to another DIE. void hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp index 3c46a99..adc536f 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -15,7 +15,9 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <map> using namespace llvm; @@ -40,7 +42,7 @@ void DbgValueHistoryMap::startInstrRange(InlinedVariable Var, assert(MI.isDebugValue() && "not a DBG_VALUE"); auto &Ranges = VarInstrRanges[Var]; if (!Ranges.empty() && Ranges.back().second == nullptr && - Ranges.back().first->isIdenticalTo(&MI)) { + Ranges.back().first->isIdenticalTo(MI)) { DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" << "\t" << Ranges.back().first << "\t" << MI << "\n"); return; @@ -122,26 +124,6 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo, clobberRegisterUses(RegVars, I, HistMap, ClobberingInstr); } -// \brief Collect all registers clobbered by @MI and apply the functor -// @Func to their RegNo. -// @Func should be a functor with a void(unsigned) signature. We're -// not using std::function here for performance reasons. It has a -// small but measurable impact. By using a functor instead of a -// std::set& here, we can avoid the overhead of constructing -// temporaries in calculateDbgValueHistory, which has a significant -// performance impact. -template<typename Callable> -static void applyToClobberedRegisters(const MachineInstr &MI, - const TargetRegisterInfo *TRI, - Callable Func) { - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || !MO.isDef() || !MO.getReg()) - continue; - for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) - Func(*AI); - } -} - // \brief Returns the first instruction in @MBB which corresponds to // the function epilogue, or nullptr if @MBB doesn't contain an epilogue. static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) { @@ -156,12 +138,12 @@ static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) { E = MBB.rend(); I != E; ++I) { if (I->getDebugLoc() != LastLoc) - return Res; + return &*Res; Res = &*I; } // If all instructions have the same debug location, assume whole MBB is // an epilogue. - return MBB.begin(); + return &*MBB.begin(); } // \brief Collect registers that are modified in the function body (their @@ -173,10 +155,23 @@ static void collectChangingRegs(const MachineFunction *MF, auto FirstEpilogueInst = getFirstEpilogueInst(MBB); for (const auto &MI : MBB) { + // Avoid looking at prologue or epilogue instructions. if (&MI == FirstEpilogueInst) break; - if (!MI.getFlag(MachineInstr::FrameSetup)) - applyToClobberedRegisters(MI, TRI, [&](unsigned r) { Regs.set(r); }); + if (MI.getFlag(MachineInstr::FrameSetup)) + continue; + + // Look for register defs and register masks. Register masks are + // typically on calls and they clobber everything not in the mask. + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.isDef() && MO.getReg()) { + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); + ++AI) + Regs.set(*AI); + } else if (MO.isRegMask()) { + Regs.setBitsNotInMask(MO.getRegMask()); + } + } } } } @@ -187,16 +182,35 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF, BitVector ChangingRegs(TRI->getNumRegs()); collectChangingRegs(MF, TRI, ChangingRegs); + const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); + unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); RegDescribedVarsMap RegVars; for (const auto &MBB : *MF) { for (const auto &MI : MBB) { if (!MI.isDebugValue()) { // Not a DBG_VALUE instruction. It may clobber registers which describe // some variables. - applyToClobberedRegisters(MI, TRI, [&](unsigned RegNo) { - if (ChangingRegs.test(RegNo)) - clobberRegisterUses(RegVars, RegNo, Result, MI); - }); + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.isDef() && MO.getReg()) { + // If this is a register def operand, it may end a debug value + // range. + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); + ++AI) + if (ChangingRegs.test(*AI)) + clobberRegisterUses(RegVars, *AI, Result, MI); + } else if (MO.isRegMask()) { + // If this is a register mask operand, clobber all debug values in + // non-CSRs. + for (int I = ChangingRegs.find_first(); I != -1; + I = ChangingRegs.find_next(I)) { + // Don't consider SP to be clobbered by register masks. + if (unsigned(I) != SP && TRI->isPhysicalRegister(I) && + MO.clobbersPhysReg(I)) { + clobberRegisterUses(RegVars, I, Result, MI); + } + } + } + } continue; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h index 546d1b4..16d2d7f 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h @@ -12,13 +12,12 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DebugInfoMetadata.h" namespace llvm { class MachineFunction; class MachineInstr; -class DILocalVariable; -class DILocation; class TargetRegisterInfo; // For each user variable, keep a list of instruction ranges where this variable diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp new file mode 100644 index 0000000..16ffe2e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -0,0 +1,230 @@ +//===-- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp -------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Common functionality for different debug information format backends. +// LLVM currently supports DWARF and CodeView. +// +//===----------------------------------------------------------------------===// + +#include "DebugHandlerBase.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +DebugHandlerBase::DebugHandlerBase(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {} + +// Each LexicalScope has first instruction and last instruction to mark +// beginning and end of a scope respectively. Create an inverse map that list +// scopes starts (and ends) with an instruction. One instruction may start (or +// end) multiple scopes. Ignore scopes that are not reachable. +void DebugHandlerBase::identifyScopeMarkers() { + SmallVector<LexicalScope *, 4> WorkList; + WorkList.push_back(LScopes.getCurrentFunctionScope()); + while (!WorkList.empty()) { + LexicalScope *S = WorkList.pop_back_val(); + + const SmallVectorImpl<LexicalScope *> &Children = S->getChildren(); + if (!Children.empty()) + WorkList.append(Children.begin(), Children.end()); + + if (S->isAbstractScope()) + continue; + + for (const InsnRange &R : S->getRanges()) { + assert(R.first && "InsnRange does not have first instruction!"); + assert(R.second && "InsnRange does not have second instruction!"); + requestLabelBeforeInsn(R.first); + requestLabelAfterInsn(R.second); + } + } +} + +// Return Label preceding the instruction. +MCSymbol *DebugHandlerBase::getLabelBeforeInsn(const MachineInstr *MI) { + MCSymbol *Label = LabelsBeforeInsn.lookup(MI); + assert(Label && "Didn't insert label before instruction"); + return Label; +} + +// Return Label immediately following the instruction. +MCSymbol *DebugHandlerBase::getLabelAfterInsn(const MachineInstr *MI) { + return LabelsAfterInsn.lookup(MI); +} + +// Determine the relative position of the pieces described by P1 and P2. +// Returns -1 if P1 is entirely before P2, 0 if P1 and P2 overlap, +// 1 if P1 is entirely after P2. +int DebugHandlerBase::pieceCmp(const DIExpression *P1, const DIExpression *P2) { + unsigned l1 = P1->getBitPieceOffset(); + unsigned l2 = P2->getBitPieceOffset(); + unsigned r1 = l1 + P1->getBitPieceSize(); + unsigned r2 = l2 + P2->getBitPieceSize(); + if (r1 <= l2) + return -1; + else if (r2 <= l1) + return 1; + else + return 0; +} + +/// Determine whether two variable pieces overlap. +bool DebugHandlerBase::piecesOverlap(const DIExpression *P1, const DIExpression *P2) { + if (!P1->isBitPiece() || !P2->isBitPiece()) + return true; + return pieceCmp(P1, P2) == 0; +} + +/// If this type is derived from a base type then return base type size. +uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) { + DIType *Ty = TyRef.resolve(); + assert(Ty); + DIDerivedType *DDTy = dyn_cast<DIDerivedType>(Ty); + if (!DDTy) + return Ty->getSizeInBits(); + + unsigned Tag = DDTy->getTag(); + + if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef && + Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && + Tag != dwarf::DW_TAG_restrict_type) + return DDTy->getSizeInBits(); + + DIType *BaseType = DDTy->getBaseType().resolve(); + + assert(BaseType && "Unexpected invalid base type"); + + // If this is a derived type, go ahead and get the base type, unless it's a + // reference then it's just the size of the field. Pointer types have no need + // of this since they're a different type of qualification on the type. + if (BaseType->getTag() == dwarf::DW_TAG_reference_type || + BaseType->getTag() == dwarf::DW_TAG_rvalue_reference_type) + return Ty->getSizeInBits(); + + return getBaseTypeSize(BaseType); +} + +void DebugHandlerBase::beginFunction(const MachineFunction *MF) { + // Grab the lexical scopes for the function, if we don't have any of those + // then we're not going to be able to do anything. + LScopes.initialize(*MF); + if (LScopes.empty()) + return; + + // Make sure that each lexical scope will have a begin/end label. + identifyScopeMarkers(); + + // Calculate history for local variables. + assert(DbgValues.empty() && "DbgValues map wasn't cleaned!"); + calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(), + DbgValues); + + // Request labels for the full history. + for (const auto &I : DbgValues) { + const auto &Ranges = I.second; + if (Ranges.empty()) + continue; + + // The first mention of a function argument gets the CurrentFnBegin + // label, so arguments are visible when breaking at function entry. + const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable(); + if (DIVar->isParameter() && + getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) { + LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin(); + if (Ranges.front().first->getDebugExpression()->isBitPiece()) { + // Mark all non-overlapping initial pieces. + for (auto I = Ranges.begin(); I != Ranges.end(); ++I) { + const DIExpression *Piece = I->first->getDebugExpression(); + if (std::all_of(Ranges.begin(), I, + [&](DbgValueHistoryMap::InstrRange Pred) { + return !piecesOverlap(Piece, Pred.first->getDebugExpression()); + })) + LabelsBeforeInsn[I->first] = Asm->getFunctionBegin(); + else + break; + } + } + } + + for (const auto &Range : Ranges) { + requestLabelBeforeInsn(Range.first); + if (Range.second) + requestLabelAfterInsn(Range.second); + } + } + + PrevInstLoc = DebugLoc(); + PrevLabel = Asm->getFunctionBegin(); +} + +void DebugHandlerBase::beginInstruction(const MachineInstr *MI) { + if (!MMI->hasDebugInfo()) + return; + + assert(CurMI == nullptr); + CurMI = MI; + + // Insert labels where requested. + DenseMap<const MachineInstr *, MCSymbol *>::iterator I = + LabelsBeforeInsn.find(MI); + + // No label needed. + if (I == LabelsBeforeInsn.end()) + return; + + // Label already assigned. + if (I->second) + return; + + if (!PrevLabel) { + PrevLabel = MMI->getContext().createTempSymbol(); + Asm->OutStreamer->EmitLabel(PrevLabel); + } + I->second = PrevLabel; +} + +void DebugHandlerBase::endInstruction() { + if (!MMI->hasDebugInfo()) + return; + + assert(CurMI != nullptr); + // Don't create a new label after DBG_VALUE instructions. + // They don't generate code. + if (!CurMI->isDebugValue()) + PrevLabel = nullptr; + + DenseMap<const MachineInstr *, MCSymbol *>::iterator I = + LabelsAfterInsn.find(CurMI); + CurMI = nullptr; + + // No label needed. + if (I == LabelsAfterInsn.end()) + return; + + // Label already assigned. + if (I->second) + return; + + // We need a label after this instruction. + if (!PrevLabel) { + PrevLabel = MMI->getContext().createTempSymbol(); + Asm->OutStreamer->EmitLabel(PrevLabel); + } + I->second = PrevLabel; +} + +void DebugHandlerBase::endFunction(const MachineFunction *MF) { + DbgValues.clear(); + LabelsBeforeInsn.clear(); + LabelsAfterInsn.clear(); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h new file mode 100644 index 0000000..b8bbcec --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h @@ -0,0 +1,109 @@ +//===-- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h --------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Common functionality for different debug information format backends. +// LLVM currently supports DWARF and CodeView. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGHANDLERBASE_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGHANDLERBASE_H + +#include "AsmPrinterHandler.h" +#include "DbgValueHistoryCalculator.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/MachineInstr.h" + +namespace llvm { + +class AsmPrinter; +class MachineModuleInfo; + +/// Base class for debug information backends. Common functionality related to +/// tracking which variables and scopes are alive at a given PC live here. +class DebugHandlerBase : public AsmPrinterHandler { +protected: + DebugHandlerBase(AsmPrinter *A); + + /// Target of debug info emission. + AsmPrinter *Asm; + + /// Collected machine module information. + MachineModuleInfo *MMI; + + /// Previous instruction's location information. This is used to + /// determine label location to indicate scope boundries in dwarf + /// debug info. + DebugLoc PrevInstLoc; + MCSymbol *PrevLabel = nullptr; + + /// This location indicates end of function prologue and beginning of + /// function body. + DebugLoc PrologEndLoc; + + /// If nonnull, stores the current machine instruction we're processing. + const MachineInstr *CurMI = nullptr; + + LexicalScopes LScopes; + + /// History of DBG_VALUE and clobber instructions for each user + /// variable. Variables are listed in order of appearance. + DbgValueHistoryMap DbgValues; + + /// Maps instruction with label emitted before instruction. + /// FIXME: Make this private from DwarfDebug, we have the necessary accessors + /// for it. + DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn; + + /// Maps instruction with label emitted after instruction. + DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn; + + /// Indentify instructions that are marking the beginning of or + /// ending of a scope. + void identifyScopeMarkers(); + + /// Ensure that a label will be emitted before MI. + void requestLabelBeforeInsn(const MachineInstr *MI) { + LabelsBeforeInsn.insert(std::make_pair(MI, nullptr)); + } + + /// Ensure that a label will be emitted after MI. + void requestLabelAfterInsn(const MachineInstr *MI) { + LabelsAfterInsn.insert(std::make_pair(MI, nullptr)); + } + + // AsmPrinterHandler overrides. +public: + void beginInstruction(const MachineInstr *MI) override; + void endInstruction() override; + + void beginFunction(const MachineFunction *MF) override; + void endFunction(const MachineFunction *MF) override; + + /// Return Label preceding the instruction. + MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); + + /// Return Label immediately following the instruction. + MCSymbol *getLabelAfterInsn(const MachineInstr *MI); + + /// Determine the relative position of the pieces described by P1 and P2. + /// Returns -1 if P1 is entirely before P2, 0 if P1 and P2 overlap, + /// 1 if P1 is entirely after P2. + static int pieceCmp(const DIExpression *P1, const DIExpression *P2); + + /// Determine whether two variable pieces overlap. + static bool piecesOverlap(const DIExpression *P1, const DIExpression *P2); + + /// If this type is derived from a base type then return base type size. + static uint64_t getBaseTypeSize(const DITypeRef TyRef); +}; + +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h index b60ab91..20acd45 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -11,11 +11,11 @@ #define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H #include "DebugLocStream.h" -#include "llvm/ADT/SmallString.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MachineLocation.h" +#include "llvm/Support/Debug.h" namespace llvm { class AsmPrinter; @@ -76,6 +76,20 @@ public: const DIExpression *getExpression() const { return Expression; } friend bool operator==(const Value &, const Value &); friend bool operator<(const Value &, const Value &); + void dump() const { + if (isLocation()) { + llvm::dbgs() << "Loc = { reg=" << Loc.getReg() << " "; + if (Loc.isIndirect()) + llvm::dbgs() << '+' << Loc.getOffset(); + llvm::dbgs() << "} "; + } + else if (isConstantInt()) + Constant.CIP->dump(); + else if (isConstantFP()) + Constant.CFP->dump(); + if (Expression) + Expression->dump(); + } }; private: diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 6665c16..2eae1b2 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "DwarfException.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" @@ -43,8 +42,7 @@ DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A) : EHStreamer(A), shouldEmitCFI(false) {} void DwarfCFIExceptionBase::markFunctionEnd() { - if (shouldEmitCFI) - Asm->OutStreamer->EmitCFIEndProc(); + endFragment(); if (MMI->getLandingPads().empty()) return; @@ -53,23 +51,28 @@ void DwarfCFIExceptionBase::markFunctionEnd() { MMI->TidyLandingPads(); } +void DwarfCFIExceptionBase::endFragment() { + if (shouldEmitCFI) + Asm->OutStreamer->EmitCFIEndProc(); +} + DwarfCFIException::DwarfCFIException(AsmPrinter *A) : DwarfCFIExceptionBase(A), shouldEmitPersonality(false), - shouldEmitLSDA(false), shouldEmitMoves(false), - moveTypeModule(AsmPrinter::CFI_M_None) {} + forceEmitPersonality(false), shouldEmitLSDA(false), + shouldEmitMoves(false), moveTypeModule(AsmPrinter::CFI_M_None) {} DwarfCFIException::~DwarfCFIException() {} /// endModule - Emit all exception information that should come after the /// content. void DwarfCFIException::endModule() { - if (moveTypeModule == AsmPrinter::CFI_M_Debug) - Asm->OutStreamer->EmitCFISections(false, true); - // SjLj uses this pass and it doesn't need this info. if (!Asm->MAI->usesCFIForEH()) return; + if (moveTypeModule == AsmPrinter::CFI_M_Debug) + Asm->OutStreamer->EmitCFISections(false, true); + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); unsigned PerEncoding = TLOF.getPersonalityEncoding(); @@ -86,6 +89,10 @@ void DwarfCFIException::endModule() { } } +static MCSymbol *getExceptionSym(AsmPrinter *Asm) { + return Asm->getCurExceptionSym(); +} + void DwarfCFIException::beginFunction(const MachineFunction *MF) { shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; const Function *F = MF->getFunction(); @@ -109,7 +116,7 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); // Emit a personality function even when there are no landing pads - bool forceEmitPersonality = + forceEmitPersonality = // ...if a personality function is explicitly specified F->hasPersonalityFn() && // ... and it's not known to be a noop in the absence of invokes @@ -126,7 +133,13 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { shouldEmitLSDA = shouldEmitPersonality && LSDAEncoding != dwarf::DW_EH_PE_omit; - shouldEmitCFI = shouldEmitPersonality || shouldEmitMoves; + shouldEmitCFI = MF->getMMI().getContext().getAsmInfo()->usesCFIForEH() && + (shouldEmitPersonality || shouldEmitMoves); + beginFragment(&*MF->begin(), getExceptionSym); +} + +void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB, + ExceptionSymbolProvider ESP) { if (!shouldEmitCFI) return; @@ -136,20 +149,24 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { if (!shouldEmitPersonality) return; + auto *F = MBB->getParent()->getFunction(); + auto *P = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); + assert(P && "Expected personality function"); + // If we are forced to emit this personality, make sure to record // it because it might not appear in any landingpad if (forceEmitPersonality) - MMI->addPersonality(Per); + MMI->addPersonality(P); + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); const MCSymbol *Sym = - TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); + TLOF.getCFIPersonalitySymbol(P, *Asm->Mang, Asm->TM, MMI); Asm->OutStreamer->EmitCFIPersonality(Sym, PerEncoding); // Provide LSDA information. - if (!shouldEmitLSDA) - return; - - Asm->OutStreamer->EmitCFILsda(Asm->getCurExceptionSym(), LSDAEncoding); + if (shouldEmitLSDA) + Asm->OutStreamer->EmitCFILsda(ESP(Asm), TLOF.getLSDAEncoding()); } /// endFunction - Gather and emit post-function exception information. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 725063a..7822814c 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -19,9 +19,10 @@ namespace llvm { DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU) - : DwarfUnit(UID, dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), + : DwarfUnit(dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), UniqueID(UID), Skeleton(nullptr), BaseAddress(nullptr) { insertDIE(Node, &getUnitDie()); + MacroLabelBegin = Asm->createTempSymbol("cu_macro_begin"); } /// addLabelAddress - Add a dwarf label attribute data and value using @@ -83,8 +84,8 @@ static const ConstantExpr *getMergedGlobalExpr(const Value *V) { // First operand points to a global struct. Value *Ptr = CE->getOperand(0); - if (!isa<GlobalValue>(Ptr) || - !isa<StructType>(cast<PointerType>(Ptr->getType())->getElementType())) + GlobalValue *GV = dyn_cast<GlobalValue>(Ptr); + if (!GV || !isa<StructType>(GV->getValueType())) return nullptr; // Second operand is zero. @@ -147,61 +148,69 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( // Add location. bool addToAccelTable = false; if (auto *Global = dyn_cast_or_null<GlobalVariable>(GV->getVariable())) { - addToAccelTable = true; - DIELoc *Loc = new (DIEValueAllocator) DIELoc; - const MCSymbol *Sym = Asm->getSymbol(Global); - if (Global->isThreadLocal()) { - if (Asm->TM.Options.EmulatedTLS) { - // TODO: add debug info for emulated thread local mode. - } else { - // FIXME: Make this work with -gsplit-dwarf. - unsigned PointerSize = Asm->getDataLayout().getPointerSize(); - assert((PointerSize == 4 || PointerSize == 8) && - "Add support for other sizes if necessary"); - // Based on GCC's support for TLS: - if (!DD->useSplitDwarf()) { - // 1) Start with a constNu of the appropriate pointer size - addUInt(*Loc, dwarf::DW_FORM_data1, PointerSize == 4 - ? dwarf::DW_OP_const4u - : dwarf::DW_OP_const8u); - // 2) containing the (relocated) offset of the TLS variable - // within the module's TLS block. - addExpr(*Loc, dwarf::DW_FORM_udata, - Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); + // We cannot describe the location of dllimport'd variables: the computation + // of their address requires loads from the IAT. + if (!Global->hasDLLImportStorageClass()) { + addToAccelTable = true; + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + const MCSymbol *Sym = Asm->getSymbol(Global); + if (Global->isThreadLocal()) { + if (Asm->TM.Options.EmulatedTLS) { + // TODO: add debug info for emulated thread local mode. } else { - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); - addUInt(*Loc, dwarf::DW_FORM_udata, - DD->getAddressPool().getIndex(Sym, /* TLS */ true)); + // FIXME: Make this work with -gsplit-dwarf. + unsigned PointerSize = Asm->getDataLayout().getPointerSize(); + assert((PointerSize == 4 || PointerSize == 8) && + "Add support for other sizes if necessary"); + // Based on GCC's support for TLS: + if (!DD->useSplitDwarf()) { + // 1) Start with a constNu of the appropriate pointer size + addUInt(*Loc, dwarf::DW_FORM_data1, PointerSize == 4 + ? dwarf::DW_OP_const4u + : dwarf::DW_OP_const8u); + // 2) containing the (relocated) offset of the TLS variable + // within the module's TLS block. + addExpr(*Loc, dwarf::DW_FORM_udata, + Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); + } else { + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); + addUInt(*Loc, dwarf::DW_FORM_udata, + DD->getAddressPool().getIndex(Sym, /* TLS */ true)); + } + // 3) followed by an OP to make the debugger do a TLS lookup. + addUInt(*Loc, dwarf::DW_FORM_data1, + DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address + : dwarf::DW_OP_form_tls_address); } - // 3) followed by an OP to make the debugger do a TLS lookup. - addUInt(*Loc, dwarf::DW_FORM_data1, - DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address - : dwarf::DW_OP_form_tls_address); + } else { + DD->addArangeLabel(SymbolCU(this, Sym)); + addOpAddress(*Loc, Sym); } - } else { - DD->addArangeLabel(SymbolCU(this, Sym)); - addOpAddress(*Loc, Sym); - } - addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); - addLinkageName(*VariableDIE, GV->getLinkageName()); + addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); + if (DD->useAllLinkageNames()) + addLinkageName(*VariableDIE, GV->getLinkageName()); + } } else if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(GV->getVariable())) { addConstantValue(*VariableDIE, CI, GTy); } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getVariable())) { - addToAccelTable = true; - // GV is a merged global. - DIELoc *Loc = new (DIEValueAllocator) DIELoc; - Value *Ptr = CE->getOperand(0); - MCSymbol *Sym = Asm->getSymbol(cast<GlobalValue>(Ptr)); - DD->addArangeLabel(SymbolCU(this, Sym)); - addOpAddress(*Loc, Sym); - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end()); - addUInt(*Loc, dwarf::DW_FORM_udata, - Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); + auto *Ptr = cast<GlobalValue>(CE->getOperand(0)); + if (!Ptr->hasDLLImportStorageClass()) { + addToAccelTable = true; + // GV is a merged global. + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + MCSymbol *Sym = Asm->getSymbol(Ptr); + DD->addArangeLabel(SymbolCU(this, Sym)); + addOpAddress(*Loc, Sym); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end()); + addUInt(*Loc, dwarf::DW_FORM_udata, + Asm->getDataLayout().getIndexedOffsetInType(Ptr->getValueType(), + Idx)); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); + } } if (addToAccelTable) { @@ -285,7 +294,8 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes()); attachLowHighPC(*SPDie, Asm->getFunctionBegin(), Asm->getFunctionEnd()); - if (!DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim( + if (DD->useAppleExtensionAttributes() && + !DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim( *DD->getCurrentFunction())) addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr); @@ -503,9 +513,20 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, addVariableAddress(DV, *VariableDie, Location); } else if (RegOp.getReg()) addVariableAddress(DV, *VariableDie, MachineLocation(RegOp.getReg())); - } else if (DVInsn->getOperand(0).isImm()) - addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType()); - else if (DVInsn->getOperand(0).isFPImm()) + } else if (DVInsn->getOperand(0).isImm()) { + // This variable is described by a single constant. + // Check whether it has a DIExpression. + auto *Expr = DV.getSingleExpression(); + if (Expr && Expr->getNumElements()) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + // If there is an expression, emit raw unsigned bytes. + DwarfExpr.AddUnsignedConstant(DVInsn->getOperand(0).getImm()); + DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end()); + addBlock(*VariableDie, dwarf::DW_AT_location, Loc); + } else + addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType()); + } else if (DVInsn->getOperand(0).isFPImm()) addConstantFPValue(*VariableDie, DVInsn->getOperand(0)); else if (DVInsn->getOperand(0).isCImm()) addConstantValue(*VariableDie, DVInsn->getOperand(0).getCImm(), @@ -526,7 +547,8 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); assert(Expr != DV.getExpression().end() && "Wrong number of expressions"); - DwarfExpr.AddMachineRegIndirect(FrameReg, Offset); + DwarfExpr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(), + FrameReg, Offset); DwarfExpr.AddExpression((*Expr)->expr_op_begin(), (*Expr)->expr_op_end()); ++Expr; } @@ -683,25 +705,6 @@ void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) { applySubprogramAttributesToDefinition(SP, *D); } } -void DwarfCompileUnit::collectDeadVariables(const DISubprogram *SP) { - assert(SP && "CU's subprogram list contains a non-subprogram"); - assert(SP->isDefinition() && - "CU's subprogram list contains a subprogram declaration"); - auto Variables = SP->getVariables(); - if (Variables.size() == 0) - return; - - DIE *SPDIE = DU->getAbstractSPDies().lookup(SP); - if (!SPDIE) - SPDIE = getDIE(SP); - assert(SPDIE); - for (const DILocalVariable *DV : Variables) { - DbgVariable NewVar(DV, /* IA */ nullptr, DD); - auto VariableDie = constructVariableDIE(NewVar); - applyVariableAttributes(NewVar, *VariableDie); - SPDIE->addChild(std::move(VariableDie)); - } -} void DwarfCompileUnit::emitHeader(bool UseOffsets) { // Don't bother labeling the .dwo unit, as its offset isn't used. @@ -770,16 +773,16 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, const MachineLocation &Location) { DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); - assert(DV.getExpression().size() == 1); - const DIExpression *Expr = DV.getExpression().back(); + const DIExpression *Expr = DV.getSingleExpression(); bool ValidReg; + const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo(); if (Location.getOffset()) { - ValidReg = DwarfExpr.AddMachineRegIndirect(Location.getReg(), + ValidReg = DwarfExpr.AddMachineRegIndirect(TRI, Location.getReg(), Location.getOffset()); if (ValidReg) DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end()); } else - ValidReg = DwarfExpr.AddMachineRegExpression(Expr, Location.getReg()); + ValidReg = DwarfExpr.AddMachineRegExpression(TRI, Expr, Location.getReg()); // Now attach the location information to the DIE. if (ValidReg) @@ -824,7 +827,7 @@ bool DwarfCompileUnit::isDwoUnit() const { } bool DwarfCompileUnit::includeMinimalInlineScopes() const { - return getCUNode()->getEmissionKind() == DIBuilder::LineTablesOnly || + return getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly || (DD->useSplitDwarf() && !Skeleton); } } // end llvm namespace diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 2e28467..90f74a3 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -15,12 +15,12 @@ #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H #include "DwarfUnit.h" -#include "llvm/ADT/StringRef.h" #include "llvm/IR/DebugInfo.h" #include "llvm/Support/Dwarf.h" namespace llvm { +class StringRef; class AsmPrinter; class DIE; class DwarfDebug; @@ -29,6 +29,12 @@ class MCSymbol; class LexicalScope; class DwarfCompileUnit : public DwarfUnit { + /// A numeric ID unique among all CUs in the module + unsigned UniqueID; + + /// Offset of the UnitDie from beginning of debug info section. + unsigned DebugInfoOffset = 0; + /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding /// the need to search for it in applyStmtList. DIE::value_iterator StmtListValue; @@ -39,6 +45,9 @@ class DwarfCompileUnit : public DwarfUnit { /// The start of the unit within its section. MCSymbol *LabelBegin; + /// The start of the unit macro info within macro section. + MCSymbol *MacroLabelBegin; + typedef llvm::SmallVector<const MDNode *, 8> ImportedEntityList; typedef llvm::DenseMap<const MDNode *, ImportedEntityList> ImportedEntityMap; @@ -74,6 +83,10 @@ public: DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU); + unsigned getUniqueID() const { return UniqueID; } + unsigned getDebugInfoOffset() const { return DebugInfoOffset; } + void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } + DwarfCompileUnit *getSkeleton() const { return Skeleton; } @@ -105,7 +118,14 @@ public: unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override; void addImportedEntity(const DIImportedEntity* IE) { - ImportedEntities[IE->getScope()].push_back(IE); + DIScope *Scope = IE->getScope(); + assert(Scope && "Invalid Scope encoding!"); + if (!isa<DILocalScope>(Scope)) + // No need to add imported enities that are not local declaration. + return; + + auto *LocalScope = cast<DILocalScope>(Scope)->getNonLexicalBlockFileScope(); + ImportedEntities[LocalScope].push_back(IE); } /// addRange - Add an address range to the list of ranges for this unit. @@ -167,8 +187,6 @@ public: void finishSubprogramDefinition(const DISubprogram *SP); - void collectDeadVariables(const DISubprogram *SP); - /// Set the skeleton unit associated with this unit. void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; } @@ -189,6 +207,10 @@ public: return LabelBegin; } + MCSymbol *getMacroLabelBegin() const { + return MacroLabelBegin; + } + /// Add a new global name to the compile unit. void addGlobalName(StringRef Name, DIE &Die, const DIScope *Context) override; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index f56c8e4..7fba768 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -26,7 +26,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Instructions.h" @@ -54,6 +53,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" + using namespace llvm; #define DEBUG_TYPE "dwarfdebug" @@ -105,13 +105,21 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, clEnumVal(Disable, "Disabled"), clEnumValEnd), cl::init(Default)); -static cl::opt<DefaultOnOff> -DwarfLinkageNames("dwarf-linkage-names", cl::Hidden, - cl::desc("Emit DWARF linkage-name attributes."), - cl::values(clEnumVal(Default, "Default for platform"), - clEnumVal(Enable, "Enabled"), - clEnumVal(Disable, "Disabled"), clEnumValEnd), - cl::init(Default)); +enum LinkageNameOption { + DefaultLinkageNames, + AllLinkageNames, + AbstractLinkageNames +}; +static cl::opt<LinkageNameOption> + DwarfLinkageNames("dwarf-linkage-names", cl::Hidden, + cl::desc("Which DWARF linkage-name attributes to emit."), + cl::values(clEnumValN(DefaultLinkageNames, "Default", + "Default for platform"), + clEnumValN(AllLinkageNames, "All", "All"), + clEnumValN(AbstractLinkageNames, "Abstract", + "Abstract subprograms"), + clEnumValEnd), + cl::init(DefaultLinkageNames)); static const char *const DWARFGroupName = "DWARF Emission"; static const char *const DbgTimerName = "DWARF Debug Writer"; @@ -130,28 +138,21 @@ void DebugLocDwarfExpression::EmitUnsigned(uint64_t Value) { BS.EmitULEB128(Value, Twine(Value)); } -bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) { +bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, + unsigned MachineReg) { // This information is not available while emitting .debug_loc entries. return false; } //===----------------------------------------------------------------------===// -/// resolve - Look in the DwarfDebug map for the MDNode that -/// corresponds to the reference. -template <typename T> T *DbgVariable::resolve(TypedDINodeRef<T> Ref) const { - return DD->resolve(Ref); -} - bool DbgVariable::isBlockByrefVariable() const { assert(Var && "Invalid complex DbgVariable!"); - return Var->getType() - .resolve(DD->getTypeIdentifierMap()) - ->isBlockByrefStruct(); + return Var->getType().resolve()->isBlockByrefStruct(); } const DIType *DbgVariable::getType() const { - DIType *Ty = Var->getType().resolve(DD->getTypeIdentifierMap()); + DIType *Ty = Var->getType().resolve(); // FIXME: isBlockByrefVariable should be reformulated in terms of complex // addresses instead. if (Ty->isBlockByrefStruct()) { @@ -201,8 +202,8 @@ static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = { DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)}; DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) - : Asm(A), MMI(Asm->MMI), DebugLocs(A->OutStreamer->isVerboseAsm()), - PrevLabel(nullptr), InfoHolder(A, "info_string", DIEValueAllocator), + : DebugHandlerBase(A), DebugLocs(A->OutStreamer->isVerboseAsm()), + InfoHolder(A, "info_string", DIEValueAllocator), SkeletonHolder(A, "skel_string", DIEValueAllocator), IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()), AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, @@ -214,7 +215,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) AccelTypes(TypeAtoms), DebuggerTuning(DebuggerKind::Default) { CurFn = nullptr; - CurMI = nullptr; Triple TT(Asm->getTargetTriple()); // Make sure we know our "debugger tuning." The target option takes @@ -234,6 +234,8 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) else HasDwarfAccelTables = DwarfAccelTables == Enable; + HasAppleExtensionAttributes = tuneForLLDB(); + // Handle split DWARF. Off by default for now. if (SplitDwarf == Default) HasSplitDwarf = false; @@ -246,11 +248,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) else HasDwarfPubSections = DwarfPubSections == Enable; - // SCE does not use linkage names. - if (DwarfLinkageNames == Default) - UseLinkageNames = !tuneForSCE(); + // SCE defaults to linkage names only for abstract subprograms. + if (DwarfLinkageNames == DefaultLinkageNames) + UseAllLinkageNames = !tuneForSCE(); else - UseLinkageNames = DwarfLinkageNames == Enable; + UseAllLinkageNames = DwarfLinkageNames == AllLinkageNames; unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion; DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber @@ -265,12 +267,10 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) // https://sourceware.org/bugzilla/show_bug.cgi?id=11616 UseGNUTLSOpcode = tuneForGDB() || DwarfVersion < 3; - Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion); + // GDB does not fully support the DWARF 4 representation for bitfields. + UseDWARF2Bitfields = (DwarfVersion < 4) || tuneForGDB(); - { - NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); - beginModule(); - } + Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion); } // Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h. @@ -297,7 +297,6 @@ static void getObjCClassCategory(StringRef In, StringRef &Class, Class = In.slice(In.find('[') + 1, In.find('(')); Category = In.slice(In.find('[') + 1, In.find(' ')); - return; } static StringRef getObjCMethodName(StringRef In) { @@ -367,8 +366,8 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) { // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. - auto &CU = SPMap[SP]; - forBothCUs(*CU, [&](DwarfCompileUnit &CU) { + auto &CU = *CUMap.lookup(cast<DISubprogram>(SP)->getUnit()); + forBothCUs(CU, [&](DwarfCompileUnit &CU) { CU.constructAbstractSubprogramScopeDIE(Scope); }); } @@ -392,8 +391,11 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) { DwarfCompileUnit &NewCU = *OwnedUnit; DIE &Die = NewCU.getUnitDie(); InfoHolder.addUnit(std::move(OwnedUnit)); - if (useSplitDwarf()) + if (useSplitDwarf()) { NewCU.setSkeleton(constructSkeletonCU(NewCU)); + NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name, + DIUnit->getSplitDebugFilename()); + } // LTO with assembly output shares a single line table amongst multiple CUs. // To avoid the compilation directory being ambiguous, let the line table @@ -419,16 +421,18 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) { addGnuPubAttributes(NewCU, Die); } - if (DIUnit->isOptimized()) - NewCU.addFlag(Die, dwarf::DW_AT_APPLE_optimized); + if (useAppleExtensionAttributes()) { + if (DIUnit->isOptimized()) + NewCU.addFlag(Die, dwarf::DW_AT_APPLE_optimized); - StringRef Flags = DIUnit->getFlags(); - if (!Flags.empty()) - NewCU.addString(Die, dwarf::DW_AT_APPLE_flags, Flags); + StringRef Flags = DIUnit->getFlags(); + if (!Flags.empty()) + NewCU.addString(Die, dwarf::DW_AT_APPLE_flags, Flags); - if (unsigned RVer = DIUnit->getRuntimeVersion()) - NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, - dwarf::DW_FORM_data1, RVer); + if (unsigned RVer = DIUnit->getRuntimeVersion()) + NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, + dwarf::DW_FORM_data1, RVer); + } if (useSplitDwarf()) NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection()); @@ -460,48 +464,42 @@ void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, // global DIEs and emit initial debug info sections. This is invoked by // the target AsmPrinter. void DwarfDebug::beginModule() { + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); if (DisableDebugInfoPrinting) return; const Module *M = MMI->getModule(); - NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); - if (!CU_Nodes) - return; - TypeIdentifierMap = generateDITypeIdentifierMap(CU_Nodes); - - SingleCU = CU_Nodes->getNumOperands() == 1; + unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(), + M->debug_compile_units_end()); + // Tell MMI whether we have debug info. + MMI->setDebugInfoAvailability(NumDebugCUs > 0); + SingleCU = NumDebugCUs == 1; - for (MDNode *N : CU_Nodes->operands()) { - auto *CUNode = cast<DICompileUnit>(N); + for (DICompileUnit *CUNode : M->debug_compile_units()) { DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode); for (auto *IE : CUNode->getImportedEntities()) CU.addImportedEntity(IE); for (auto *GV : CUNode->getGlobalVariables()) CU.getOrCreateGlobalVariableDIE(GV); - for (auto *SP : CUNode->getSubprograms()) - SPMap.insert(std::make_pair(SP, &CU)); for (auto *Ty : CUNode->getEnumTypes()) { // The enum types array by design contains pointers to // MDNodes rather than DIRefs. Unique them here. - CU.getOrCreateTypeDIE(cast<DIType>(resolve(Ty->getRef()))); + CU.getOrCreateTypeDIE(cast<DIType>(Ty)); } for (auto *Ty : CUNode->getRetainedTypes()) { // The retained types array by design contains pointers to // MDNodes rather than DIRefs. Unique them here. - DIType *RT = cast<DIType>(resolve(Ty->getRef())); - if (!RT->isExternalTypeRef()) - // There is no point in force-emitting a forward declaration. - CU.getOrCreateTypeDIE(RT); + if (DIType *RT = dyn_cast<DIType>(Ty)) + if (!RT->isExternalTypeRef()) + // There is no point in force-emitting a forward declaration. + CU.getOrCreateTypeDIE(RT); } // Emit imported_modules last so that the relevant context is already // available. for (auto *IE : CUNode->getImportedEntities()) constructAndAddImportedEntityDIE(CU, IE); } - - // Tell MMI that we have debug info. - MMI->setDebugInfoAvailability(true); } void DwarfDebug::finishVariableDefinitions() { @@ -524,31 +522,13 @@ void DwarfDebug::finishVariableDefinitions() { } void DwarfDebug::finishSubprogramDefinitions() { - for (const auto &P : SPMap) - forBothCUs(*P.second, [&](DwarfCompileUnit &CU) { - CU.finishSubprogramDefinition(cast<DISubprogram>(P.first)); - }); -} - - -// Collect info for variables that were optimized out. -void DwarfDebug::collectDeadVariables() { - const Module *M = MMI->getModule(); - - if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) { - for (MDNode *N : CU_Nodes->operands()) { - auto *TheCU = cast<DICompileUnit>(N); - // Construct subprogram DIE and add variables DIEs. - DwarfCompileUnit *SPCU = - static_cast<DwarfCompileUnit *>(CUMap.lookup(TheCU)); - assert(SPCU && "Unable to find Compile Unit!"); - for (auto *SP : TheCU->getSubprograms()) { - if (ProcessedSPNodes.count(SP) != 0) - continue; - SPCU->collectDeadVariables(SP); - } - } - } + for (auto &F : MMI->getModule()->functions()) + if (auto *SP = F.getSubprogram()) + if (ProcessedSPNodes.count(SP) && + SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug) + forBothCUs(*CUMap.lookup(SP->getUnit()), [&](DwarfCompileUnit &CU) { + CU.finishSubprogramDefinition(SP); + }); } void DwarfDebug::finalizeModuleInfo() { @@ -558,11 +538,6 @@ void DwarfDebug::finalizeModuleInfo() { finishVariableDefinitions(); - // Collect info for variables that were optimized out. - collectDeadVariables(); - - unsigned MacroOffset = 0; - std::unique_ptr<AsmStreamerBase> AS(new SizeReporterAsmStreamer(Asm)); // Handle anything that needs to be done on a per-unit basis after // all other generation. for (const auto &P : CUMap) { @@ -617,13 +592,11 @@ void DwarfDebug::finalizeModuleInfo() { } auto *CUNode = cast<DICompileUnit>(P.first); - if (CUNode->getMacros()) { - // Compile Unit has macros, emit "DW_AT_macro_info" attribute. - U.addUInt(U.getUnitDie(), dwarf::DW_AT_macro_info, - dwarf::DW_FORM_sec_offset, MacroOffset); - // Update macro section offset - MacroOffset += handleMacroNodes(AS.get(), CUNode->getMacros(), U); - } + // If compile Unit has macros, emit "DW_AT_macro_info" attribute. + if (CUNode->getMacros()) + U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info, + U.getMacroLabelBegin(), + TLOF.getDwarfMacinfoSection()->getBeginSymbol()); } // Compute DIE offsets and sizes. @@ -694,7 +667,6 @@ void DwarfDebug::endModule() { } // clean up. - SPMap.clear(); AbstractVariables.clear(); } @@ -717,7 +689,7 @@ DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV) { void DwarfDebug::createAbstractVariable(const DILocalVariable *Var, LexicalScope *Scope) { - auto AbsDbgVariable = make_unique<DbgVariable>(Var, /* IA */ nullptr, this); + auto AbsDbgVariable = make_unique<DbgVariable>(Var, /* IA */ nullptr); InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get()); AbstractVariables[Var] = std::move(AbsDbgVariable); } @@ -761,7 +733,7 @@ void DwarfDebug::collectVariableInfoFromMMITable( continue; ensureAbstractVariableIsCreatedIfScoped(Var, Scope->getScopeNode()); - auto RegVar = make_unique<DbgVariable>(Var.first, Var.second, this); + auto RegVar = make_unique<DbgVariable>(Var.first, Var.second); RegVar->initializeMMI(VI.Expr, VI.Slot); if (InfoHolder.addScopeVariable(Scope, RegVar.get())) ConcreteVariables.push_back(std::move(RegVar)); @@ -793,29 +765,6 @@ static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) { llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!"); } -// Determine the relative position of the pieces described by P1 and P2. -// Returns -1 if P1 is entirely before P2, 0 if P1 and P2 overlap, -// 1 if P1 is entirely after P2. -static int pieceCmp(const DIExpression *P1, const DIExpression *P2) { - unsigned l1 = P1->getBitPieceOffset(); - unsigned l2 = P2->getBitPieceOffset(); - unsigned r1 = l1 + P1->getBitPieceSize(); - unsigned r2 = l2 + P2->getBitPieceSize(); - if (r1 <= l2) - return -1; - else if (r2 <= l1) - return 1; - else - return 0; -} - -/// Determine whether two variable pieces overlap. -static bool piecesOverlap(const DIExpression *P1, const DIExpression *P2) { - if (!P1->isBitPiece() || !P2->isBitPiece()) - return true; - return pieceCmp(P1, P2) == 0; -} - /// \brief If this and Next are describing different pieces of the same /// variable, merge them by appending Next's values to the current /// list of values. @@ -832,8 +781,9 @@ bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) { // sorted. for (unsigned i = 0, j = 0; i < Values.size(); ++i) { for (; j < Next.Values.size(); ++j) { - int res = pieceCmp(cast<DIExpression>(Values[i].Expression), - cast<DIExpression>(Next.Values[j].Expression)); + int res = DebugHandlerBase::pieceCmp( + cast<DIExpression>(Values[i].Expression), + cast<DIExpression>(Next.Values[j].Expression)); if (res == 0) // The two expressions overlap, we can't merge. return false; // Values[i] is entirely before Next.Values[j], @@ -944,7 +894,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, DEBUG({ dbgs() << CurEntry->getValues().size() << " Values:\n"; for (auto &Value : CurEntry->getValues()) - Value.getExpression()->dump(); + Value.dump(); dbgs() << "-----\n"; }); @@ -957,12 +907,23 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, DbgVariable *DwarfDebug::createConcreteVariable(LexicalScope &Scope, InlinedVariable IV) { ensureAbstractVariableIsCreatedIfScoped(IV, Scope.getScopeNode()); - ConcreteVariables.push_back( - make_unique<DbgVariable>(IV.first, IV.second, this)); + ConcreteVariables.push_back(make_unique<DbgVariable>(IV.first, IV.second)); InfoHolder.addScopeVariable(&Scope, ConcreteVariables.back().get()); return ConcreteVariables.back().get(); } +// Determine whether this DBG_VALUE is valid at the beginning of the function. +static bool validAtEntry(const MachineInstr *MInsn) { + auto MBB = MInsn->getParent(); + // Is it in the entry basic block? + if (!MBB->pred_empty()) + return false; + for (MachineBasicBlock::const_reverse_iterator I(MInsn); I != MBB->rend(); ++I) + if (!(I->isDebugValue() || I->getFlag(MachineInstr::FrameSetup))) + return false; + return true; +} + // Find variables for each lexical scope. void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP, @@ -995,8 +956,11 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, const MachineInstr *MInsn = Ranges.front().first; assert(MInsn->isDebugValue() && "History must begin with debug value"); - // Check if the first DBG_VALUE is valid for the rest of the function. - if (Ranges.size() == 1 && Ranges.front().second == nullptr) { + // Check if there is a single DBG_VALUE, valid throughout the function. + // A single constant is also considered valid for the entire function. + if (Ranges.size() == 1 && + (MInsn->getOperand(0).isImm() || + (validAtEntry(MInsn) && Ranges.front().second == nullptr))) { RegVar->initializeDbgValue(MInsn); continue; } @@ -1008,7 +972,7 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, SmallVector<DebugLocEntry, 8> Entries; buildLocationList(Entries, Ranges); - // If the variable has an DIBasicType, extract it. Basic types cannot have + // If the variable has a DIBasicType, extract it. Basic types cannot have // unique identifiers, so don't bother resolving the type with the // identifier map. const DIBasicType *BT = dyn_cast<DIBasicType>( @@ -1027,25 +991,14 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, } } -// Return Label preceding the instruction. -MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) { - MCSymbol *Label = LabelsBeforeInsn.lookup(MI); - assert(Label && "Didn't insert label before instruction"); - return Label; -} - -// Return Label immediately following the instruction. -MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) { - return LabelsAfterInsn.lookup(MI); -} - // Process beginning of an instruction. void DwarfDebug::beginInstruction(const MachineInstr *MI) { - assert(CurMI == nullptr); - CurMI = MI; + DebugHandlerBase::beginInstruction(MI); + assert(CurMI); + // Check if source location changes, but ignore DBG_VALUE locations. if (!MI->isDebugValue()) { - DebugLoc DL = MI->getDebugLoc(); + const DebugLoc &DL = MI->getDebugLoc(); if (DL != PrevInstLoc) { if (DL) { unsigned Flags = 0; @@ -1067,78 +1020,6 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { } } } - - // Insert labels where requested. - DenseMap<const MachineInstr *, MCSymbol *>::iterator I = - LabelsBeforeInsn.find(MI); - - // No label needed. - if (I == LabelsBeforeInsn.end()) - return; - - // Label already assigned. - if (I->second) - return; - - if (!PrevLabel) { - PrevLabel = MMI->getContext().createTempSymbol(); - Asm->OutStreamer->EmitLabel(PrevLabel); - } - I->second = PrevLabel; -} - -// Process end of an instruction. -void DwarfDebug::endInstruction() { - assert(CurMI != nullptr); - // Don't create a new label after DBG_VALUE instructions. - // They don't generate code. - if (!CurMI->isDebugValue()) - PrevLabel = nullptr; - - DenseMap<const MachineInstr *, MCSymbol *>::iterator I = - LabelsAfterInsn.find(CurMI); - CurMI = nullptr; - - // No label needed. - if (I == LabelsAfterInsn.end()) - return; - - // Label already assigned. - if (I->second) - return; - - // We need a label after this instruction. - if (!PrevLabel) { - PrevLabel = MMI->getContext().createTempSymbol(); - Asm->OutStreamer->EmitLabel(PrevLabel); - } - I->second = PrevLabel; -} - -// Each LexicalScope has first instruction and last instruction to mark -// beginning and end of a scope respectively. Create an inverse map that list -// scopes starts (and ends) with an instruction. One instruction may start (or -// end) multiple scopes. Ignore scopes that are not reachable. -void DwarfDebug::identifyScopeMarkers() { - SmallVector<LexicalScope *, 4> WorkList; - WorkList.push_back(LScopes.getCurrentFunctionScope()); - while (!WorkList.empty()) { - LexicalScope *S = WorkList.pop_back_val(); - - const SmallVectorImpl<LexicalScope *> &Children = S->getChildren(); - if (!Children.empty()) - WorkList.append(Children.begin(), Children.end()); - - if (S->isAbstractScope()) - continue; - - for (const InsnRange &R : S->getRanges()) { - assert(R.first && "InsnRange does not have first instruction!"); - assert(R.second && "InsnRange does not have second instruction!"); - requestLabelBeforeInsn(R.first); - requestLabelAfterInsn(R.second); - } - } } static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { @@ -1167,15 +1048,10 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Grab the lexical scopes for the function, if we don't have any of those // then we're not going to be able to do anything. - LScopes.initialize(*MF); + DebugHandlerBase::beginFunction(MF); if (LScopes.empty()) return; - assert(DbgValues.empty() && "DbgValues map wasn't cleaned!"); - - // Make sure that each lexical scope will have a begin/end label. - identifyScopeMarkers(); - // Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function // belongs to so that we add to the correct per-cu line table in the // non-asm case. @@ -1188,55 +1064,19 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // isn't structurally identical (see: file path/name info from clang, which // includes the directory of the cpp file being built, even when the file name // is absolute (such as an <> lookup header))) - DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); - assert(TheCU && "Unable to find compile unit!"); + auto *SP = cast<DISubprogram>(FnScope->getScopeNode()); + DwarfCompileUnit *TheCU = CUMap.lookup(SP->getUnit()); + if (!TheCU) { + assert(SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug && + "DICompileUnit missing from llvm.dbg.cu?"); + return; + } if (Asm->OutStreamer->hasRawTextSupport()) // Use a single line table if we are generating assembly. Asm->OutStreamer->getContext().setDwarfCompileUnitID(0); else Asm->OutStreamer->getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); - // Calculate history for local variables. - calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(), - DbgValues); - - // Request labels for the full history. - for (const auto &I : DbgValues) { - const auto &Ranges = I.second; - if (Ranges.empty()) - continue; - - // The first mention of a function argument gets the CurrentFnBegin - // label, so arguments are visible when breaking at function entry. - const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable(); - if (DIVar->isParameter() && - getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) { - LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin(); - if (Ranges.front().first->getDebugExpression()->isBitPiece()) { - // Mark all non-overlapping initial pieces. - for (auto I = Ranges.begin(); I != Ranges.end(); ++I) { - const DIExpression *Piece = I->first->getDebugExpression(); - if (std::all_of(Ranges.begin(), I, - [&](DbgValueHistoryMap::InstrRange Pred) { - return !piecesOverlap(Piece, Pred.first->getDebugExpression()); - })) - LabelsBeforeInsn[I->first] = Asm->getFunctionBegin(); - else - break; - } - } - } - - for (const auto &Range : Ranges) { - requestLabelBeforeInsn(Range.first); - if (Range.second) - requestLabelAfterInsn(Range.second); - } - } - - PrevInstLoc = DebugLoc(); - PrevLabel = Asm->getFunctionBegin(); - // Record beginning of function. PrologEndLoc = findPrologueEndLoc(MF); if (DILocation *L = PrologEndLoc) { @@ -1252,13 +1092,19 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { assert(CurFn == MF && "endFunction should be called with the same function as beginFunction"); - if (!MMI->hasDebugInfo() || LScopes.empty() || - !MF->getFunction()->getSubprogram()) { + const DISubprogram *SP = MF->getFunction()->getSubprogram(); + if (!MMI->hasDebugInfo() || LScopes.empty() || !SP || + SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) { // If we don't have a lexical scope for this function then there will // be a hole in the range information. Keep note of this by setting the // previously used section to nullptr. PrevCU = nullptr; CurFn = nullptr; + DebugHandlerBase::endFunction(MF); + // Mark functions with no debug info on any instructions, but a + // valid DISubprogram as processed. + if (SP) + ProcessedSPNodes.insert(SP); return; } @@ -1266,8 +1112,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { Asm->OutStreamer->getContext().setDwarfCompileUnitID(0); LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); - auto *SP = cast<DISubprogram>(FnScope->getScopeNode()); - DwarfCompileUnit &TheCU = *SPMap.lookup(SP); + SP = cast<DISubprogram>(FnScope->getScopeNode()); + DwarfCompileUnit &TheCU = *CUMap.lookup(SP->getUnit()); DenseSet<InlinedVariable> ProcessedVars; collectVariableInfo(TheCU, SP, ProcessedVars); @@ -1277,17 +1123,16 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Under -gmlt, skip building the subprogram if there are no inlined // subroutines inside it. - if (TheCU.getCUNode()->getEmissionKind() == DIBuilder::LineTablesOnly && + if (TheCU.getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly && LScopes.getAbstractScopesList().empty() && !IsDarwin) { assert(InfoHolder.getScopeVariables().empty()); assert(DbgValues.empty()); // FIXME: This wouldn't be true in LTO with a -g (with inlining) CU followed // by a -gmlt CU. Add a test and remove this assertion. assert(AbstractVariables.empty()); - LabelsBeforeInsn.clear(); - LabelsAfterInsn.clear(); PrevLabel = nullptr; CurFn = nullptr; + DebugHandlerBase::endFunction(MF); return; } @@ -1319,11 +1164,9 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // DbgVariables except those that are also in AbstractVariables (since they // can be used cross-function) InfoHolder.getScopeVariables().clear(); - DbgValues.clear(); - LabelsBeforeInsn.clear(); - LabelsAfterInsn.clear(); PrevLabel = nullptr; CurFn = nullptr; + DebugHandlerBase::endFunction(MF); } // Register a source line with debug info. Returns the unique label that was @@ -1535,7 +1378,7 @@ void DwarfDebug::emitDebugPubTypes(bool GnuStyle) { &DwarfCompileUnit::getGlobalTypes); } -// Emit visible names into a debug str section. +/// Emit null-terminated strings into a debug str section. void DwarfDebug::emitDebugStr() { DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection()); @@ -1554,8 +1397,7 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, ByteStreamer &Streamer, const DebugLocEntry::Value &Value, unsigned PieceOffsetInBits) { - DebugLocDwarfExpression DwarfExpr(*AP.MF->getSubtarget().getRegisterInfo(), - AP.getDwarfDebug()->getDwarfVersion(), + DebugLocDwarfExpression DwarfExpr(AP.getDwarfDebug()->getDwarfVersion(), Streamer); // Regular entry. if (Value.isInt()) { @@ -1572,18 +1414,19 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, AP.EmitDwarfRegOp(Streamer, Loc); else { // Complex address entry. + const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo(); if (Loc.getOffset()) { - DwarfExpr.AddMachineRegIndirect(Loc.getReg(), Loc.getOffset()); + DwarfExpr.AddMachineRegIndirect(TRI, Loc.getReg(), Loc.getOffset()); DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end(), PieceOffsetInBits); } else - DwarfExpr.AddMachineRegExpression(Expr, Loc.getReg(), + DwarfExpr.AddMachineRegExpression(TRI, Expr, Loc.getReg(), PieceOffsetInBits); } + } else if (Value.isConstantFP()) { + APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt(); + DwarfExpr.AddUnsignedConstant(RawBytes); } - // else ... ignore constant fp. There is not any good way to - // to represent them here in dwarf. - // FIXME: ^ } void DebugLocEntry::finalize(const AsmPrinter &AP, @@ -1608,8 +1451,7 @@ void DebugLocEntry::finalize(const AsmPrinter &AP, assert(Offset <= PieceOffset && "overlapping or duplicate pieces"); if (Offset < PieceOffset) { // The DWARF spec seriously mandates pieces with no locations for gaps. - DebugLocDwarfExpression Expr(*AP.MF->getSubtarget().getRegisterInfo(), - AP.getDwarfDebug()->getDwarfVersion(), + DebugLocDwarfExpression Expr(AP.getDwarfDebug()->getDwarfVersion(), Streamer); Expr.AddOpPiece(PieceOffset-Offset, 0); Offset += PieceOffset-Offset; @@ -1708,24 +1550,12 @@ void DwarfDebug::emitDebugARanges() { } } - // Add terminating symbols for each section. - for (const auto &I : SectionMap) { - MCSection *Section = I.first; - MCSymbol *Sym = nullptr; - - if (Section) - Sym = Asm->OutStreamer->endSection(Section); - - // Insert a final terminator. - SectionMap[Section].push_back(SymbolCU(nullptr, Sym)); - } - DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> Spans; for (auto &I : SectionMap) { - const MCSection *Section = I.first; + MCSection *Section = I.first; SmallVector<SymbolCU, 8> &List = I.second; - if (List.size() < 2) + if (List.size() < 1) continue; // If we have no section (e.g. common), just write out @@ -1735,26 +1565,29 @@ void DwarfDebug::emitDebugARanges() { ArangeSpan Span; Span.Start = Cur.Sym; Span.End = nullptr; - if (Cur.CU) - Spans[Cur.CU].push_back(Span); + assert(Cur.CU); + Spans[Cur.CU].push_back(Span); } continue; } // Sort the symbols by offset within the section. - std::sort(List.begin(), List.end(), - [&](const SymbolCU &A, const SymbolCU &B) { - unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0; - unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0; - - // Symbols with no order assigned should be placed at the end. - // (e.g. section end labels) - if (IA == 0) - return false; - if (IB == 0) - return true; - return IA < IB; - }); + std::sort( + List.begin(), List.end(), [&](const SymbolCU &A, const SymbolCU &B) { + unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0; + unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0; + + // Symbols with no order assigned should be placed at the end. + // (e.g. section end labels) + if (IA == 0) + return false; + if (IB == 0) + return true; + return IA < IB; + }); + + // Insert a final terminator. + List.push_back(SymbolCU(nullptr, Asm->OutStreamer->endSection(Section))); // Build spans between each label. const MCSymbol *StartSym = List[0].Sym; @@ -1767,6 +1600,7 @@ void DwarfDebug::emitDebugARanges() { ArangeSpan Span; Span.Start = StartSym; Span.End = Cur.Sym; + assert(Prev.CU); Spans[Prev.CU].push_back(Span); StartSym = Cur.Sym; } @@ -1787,9 +1621,10 @@ void DwarfDebug::emitDebugARanges() { } // Sort the CU list (again, to ensure consistent output order). - std::sort(CUs.begin(), CUs.end(), [](const DwarfUnit *A, const DwarfUnit *B) { - return A->getUniqueID() < B->getUniqueID(); - }); + std::sort(CUs.begin(), CUs.end(), + [](const DwarfCompileUnit *A, const DwarfCompileUnit *B) { + return A->getUniqueID() < B->getUniqueID(); + }); // Emit an arange table for each CU we used. for (DwarfCompileUnit *CU : CUs) { @@ -1827,7 +1662,7 @@ void DwarfDebug::emitDebugARanges() { Asm->OutStreamer->AddComment("Segment Size (in bytes)"); Asm->EmitInt8(0); - Asm->OutStreamer->EmitFill(Padding, 0xff); + Asm->OutStreamer->emitFill(Padding, 0xff); for (const ArangeSpan &Span : List) { Asm->EmitLabelReference(Span.Start, PtrSize); @@ -1852,7 +1687,7 @@ void DwarfDebug::emitDebugARanges() { } } -// Emit visible names into a debug ranges section. +/// Emit address ranges into a debug ranges section. void DwarfDebug::emitDebugRanges() { // Start the dwarf ranges section. Asm->OutStreamer->SwitchSection( @@ -1894,65 +1729,56 @@ void DwarfDebug::emitDebugRanges() { } } -unsigned DwarfDebug::handleMacroNodes(AsmStreamerBase *AS, - DIMacroNodeArray Nodes, - DwarfCompileUnit &U) { - unsigned Size = 0; +void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) { for (auto *MN : Nodes) { if (auto *M = dyn_cast<DIMacro>(MN)) - Size += emitMacro(AS, *M); + emitMacro(*M); else if (auto *F = dyn_cast<DIMacroFile>(MN)) - Size += emitMacroFile(AS, *F, U); + emitMacroFile(*F, U); else llvm_unreachable("Unexpected DI type!"); } - return Size; } -unsigned DwarfDebug::emitMacro(AsmStreamerBase *AS, DIMacro &M) { - int Size = 0; - Size += AS->emitULEB128(M.getMacinfoType()); - Size += AS->emitULEB128(M.getLine()); +void DwarfDebug::emitMacro(DIMacro &M) { + Asm->EmitULEB128(M.getMacinfoType()); + Asm->EmitULEB128(M.getLine()); StringRef Name = M.getName(); StringRef Value = M.getValue(); - Size += AS->emitBytes(Name); + Asm->OutStreamer->EmitBytes(Name); if (!Value.empty()) { // There should be one space between macro name and macro value. - Size += AS->emitInt8(' '); - Size += AS->emitBytes(Value); + Asm->EmitInt8(' '); + Asm->OutStreamer->EmitBytes(Value); } - Size += AS->emitInt8('\0'); - return Size; + Asm->EmitInt8('\0'); } -unsigned DwarfDebug::emitMacroFile(AsmStreamerBase *AS, DIMacroFile &F, - DwarfCompileUnit &U) { - int Size = 0; +void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) { assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file); - Size += AS->emitULEB128(dwarf::DW_MACINFO_start_file); - Size += AS->emitULEB128(F.getLine()); + Asm->EmitULEB128(dwarf::DW_MACINFO_start_file); + Asm->EmitULEB128(F.getLine()); DIFile *File = F.getFile(); unsigned FID = U.getOrCreateSourceID(File->getFilename(), File->getDirectory()); - Size += AS->emitULEB128(FID); - Size += handleMacroNodes(AS, F.getElements(), U); - Size += AS->emitULEB128(dwarf::DW_MACINFO_end_file); - return Size; + Asm->EmitULEB128(FID); + handleMacroNodes(F.getElements(), U); + Asm->EmitULEB128(dwarf::DW_MACINFO_end_file); } -// Emit visible names into a debug macinfo section. +/// Emit macros into a debug macinfo section. void DwarfDebug::emitDebugMacinfo() { - if (MCSection *Macinfo = Asm->getObjFileLowering().getDwarfMacinfoSection()) { - // Start the dwarf macinfo section. - Asm->OutStreamer->SwitchSection(Macinfo); - } - std::unique_ptr<AsmStreamerBase> AS(new EmittingAsmStreamer(Asm)); + // Start the dwarf macinfo section. + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfMacinfoSection()); + for (const auto &P : CUMap) { auto &TheCU = *P.second; auto *SkCU = TheCU.getSkeleton(); DwarfCompileUnit &U = SkCU ? *SkCU : TheCU; auto *CUNode = cast<DICompileUnit>(P.first); - handleMacroNodes(AS.get(), CUNode->getMacros(), U); + Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin()); + handleMacroNodes(CUNode->getMacros(), U); } Asm->OutStreamer->AddComment("End Of Macro List Mark"); Asm->EmitInt8(0); @@ -1961,7 +1787,7 @@ void DwarfDebug::emitDebugMacinfo() { // DWARF5 Experimental Separate Dwarf emitters. void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, - std::unique_ptr<DwarfUnit> NewU) { + std::unique_ptr<DwarfCompileUnit> NewU) { NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name, U.getCUNode()->getSplitDebugFilename()); @@ -2050,21 +1876,19 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed()) return; - const DwarfTypeUnit *&TU = DwarfTypeUnits[CTy]; - if (TU) { - CU.addDIETypeSignature(RefDie, *TU); + auto Ins = TypeSignatures.insert(std::make_pair(CTy, 0)); + if (!Ins.second) { + CU.addDIETypeSignature(RefDie, Ins.first->second); return; } bool TopLevelType = TypeUnitsUnderConstruction.empty(); AddrPool.resetUsedFlag(); - auto OwnedUnit = make_unique<DwarfTypeUnit>( - InfoHolder.getUnits().size() + TypeUnitsUnderConstruction.size(), CU, Asm, - this, &InfoHolder, getDwoLineTable(CU)); + auto OwnedUnit = make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder, + getDwoLineTable(CU)); DwarfTypeUnit &NewTU = *OwnedUnit; DIE &UnitDie = NewTU.getUnitDie(); - TU = &NewTU; TypeUnitsUnderConstruction.push_back( std::make_pair(std::move(OwnedUnit), CTy)); @@ -2073,6 +1897,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, uint64_t Signature = makeTypeSignature(Identifier); NewTU.setTypeSignature(Signature); + Ins.first->second = Signature; if (useSplitDwarf()) NewTU.initSection(Asm->getObjFileLowering().getDwarfTypesDWOSection()); @@ -2096,7 +1921,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, // This is pessimistic as some of these types might not be dependent on // the type that used an address. for (const auto &TU : TypeUnitsToAdd) - DwarfTypeUnits.erase(TU.second); + TypeSignatures.erase(TU.second); // Construct this type in the CU directly. // This is inefficient because all the dependent types will be rebuilt @@ -2108,10 +1933,12 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, // If the type wasn't dependent on fission addresses, finish adding the type // and all its dependent types. - for (auto &TU : TypeUnitsToAdd) - InfoHolder.addUnit(std::move(TU.first)); + for (auto &TU : TypeUnitsToAdd) { + InfoHolder.computeSizeAndOffsetsForUnit(TU.first.get()); + InfoHolder.emitUnit(TU.first.get(), useSplitDwarf()); + } } - CU.addDIETypeSignature(RefDie, NewTU); + CU.addDIETypeSignature(RefDie, Signature); } // Accelerator table mutators - add each name along with its companion diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 460c186..6b06757 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -14,14 +14,13 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H -#include "AsmPrinterHandler.h" #include "DbgValueHistoryCalculator.h" +#include "DebugHandlerBase.h" #include "DebugLocStream.h" #include "DwarfAccelTable.h" #include "DwarfFile.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" @@ -69,15 +68,14 @@ class DbgVariable { unsigned DebugLocListIndex = ~0u; /// Offset in DebugLocs. const MachineInstr *MInsn = nullptr; /// DBG_VALUE instruction. SmallVector<int, 1> FrameIndex; /// Frame index. - DwarfDebug *DD; public: /// Construct a DbgVariable. /// /// Creates a variable without any DW_AT_location. Call \a initializeMMI() /// for MMI entries, or \a initializeDbgValue() for DBG_VALUE instructions. - DbgVariable(const DILocalVariable *V, const DILocation *IA, DwarfDebug *DD) - : Var(V), IA(IA), DD(DD) {} + DbgVariable(const DILocalVariable *V, const DILocation *IA) + : Var(V), IA(IA) {} /// Initialize from the MMI table. void initializeMMI(const DIExpression *E, int FI) { @@ -111,6 +109,10 @@ public: const DILocalVariable *getVariable() const { return Var; } const DILocation *getInlinedAt() const { return IA; } ArrayRef<const DIExpression *> getExpression() const { return Expr; } + const DIExpression *getSingleExpression() const { + assert(MInsn && Expr.size() <= 1); + return Expr.size() ? Expr[0] : nullptr; + } void setDIE(DIE &D) { TheDIE = &D; } DIE *getDIE() const { return TheDIE; } void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; } @@ -174,9 +176,9 @@ public: const DIType *getType() const; private: - /// Look in the DwarfDebug map for the MDNode that - /// corresponds to the reference. - template <typename T> T *resolve(TypedDINodeRef<T> Ref) const; + template <typename T> T *resolve(TypedDINodeRef<T> Ref) const { + return Ref.resolve(); + } }; @@ -188,22 +190,13 @@ struct SymbolCU { }; /// Collects and handles dwarf debug information. -class DwarfDebug : public AsmPrinterHandler { - /// Target of Dwarf emission. - AsmPrinter *Asm; - - /// Collected machine module information. - MachineModuleInfo *MMI; - +class DwarfDebug : public DebugHandlerBase { /// All DIEValues are allocated through this allocator. BumpPtrAllocator DIEValueAllocator; /// Maps MDNode with its corresponding DwarfCompileUnit. MapVector<const MDNode *, DwarfCompileUnit *> CUMap; - /// Maps subprogram MDNode with its corresponding DwarfCompileUnit. - MapVector<const MDNode *, DwarfCompileUnit *> SPMap; - /// Maps a CU DIE with its corresponding DwarfCompileUnit. DenseMap<const DIE *, DwarfCompileUnit *> CUDieMap; @@ -213,8 +206,6 @@ class DwarfDebug : public AsmPrinterHandler { /// Size of each symbol emitted (for those symbols that have a specific size). DenseMap<const MCSymbol *, uint64_t> SymSize; - LexicalScopes LScopes; - /// Collection of abstract variables. DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables; SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables; @@ -227,32 +218,9 @@ class DwarfDebug : public AsmPrinterHandler { /// create DIEs. SmallPtrSet<const MDNode *, 16> ProcessedSPNodes; - /// Maps instruction with label emitted before instruction. - DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn; - - /// Maps instruction with label emitted after instruction. - DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn; - - /// History of DBG_VALUE and clobber instructions for each user - /// variable. Variables are listed in order of appearance. - DbgValueHistoryMap DbgValues; - - /// Previous instruction's location information. This is used to - /// determine label location to indicate scope boundries in dwarf - /// debug info. - DebugLoc PrevInstLoc; - MCSymbol *PrevLabel; - - /// This location indicates end of function prologue and beginning of - /// function body. - DebugLoc PrologEndLoc; - /// If nonnull, stores the current machine function we're processing. const MachineFunction *CurFn; - /// If nonnull, stores the current machine instruction we're processing. - const MachineInstr *CurMI; - /// If nonnull, stores the CU in which the previous subprogram was contained. const DwarfCompileUnit *PrevCU; @@ -266,9 +234,9 @@ class DwarfDebug : public AsmPrinterHandler { /// Holders for the various debug information flags that we might need to /// have exposed. See accessor functions below for description. - /// Map from MDNodes for user-defined types to the type units that - /// describe them. - DenseMap<const MDNode *, const DwarfTypeUnit *> DwarfTypeUnits; + /// Map from MDNodes for user-defined types to their type signatures. Also + /// used to keep track of which types we have emitted type units for. + DenseMap<const MDNode *, uint64_t> TypeSignatures; SmallVector< std::pair<std::unique_ptr<DwarfTypeUnit>, const DICompositeType *>, 1> @@ -280,18 +248,19 @@ class DwarfDebug : public AsmPrinterHandler { /// Whether to use the GNU TLS opcode (instead of the standard opcode). bool UseGNUTLSOpcode; - /// Whether to emit DW_AT_[MIPS_]linkage_name. - bool UseLinkageNames; + /// Whether to use DWARF 2 bitfields (instead of the DWARF 4 format). + bool UseDWARF2Bitfields; + + /// Whether to emit all linkage names, or just abstract subprograms. + bool UseAllLinkageNames; /// Version of dwarf we're emitting. unsigned DwarfVersion; - /// Maps from a type identifier to the actual MDNode. - DITypeIdentifierMap TypeIdentifierMap; - /// DWARF5 Experimental Options /// @{ bool HasDwarfAccelTables; + bool HasAppleExtensionAttributes; bool HasSplitDwarf; /// Separated Dwarf Variables @@ -324,9 +293,19 @@ class DwarfDebug : public AsmPrinterHandler { // Identify a debugger for "tuning" the debug info. DebuggerKind DebuggerTuning; + /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger. + /// + /// Returns whether we are "tuning" for a given debugger. + /// Should be used only within the constructor, to set feature flags. + /// @{ + bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; } + bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; } + bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; } + /// @} + MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &); - const SmallVectorImpl<std::unique_ptr<DwarfUnit>> &getUnits() { + const SmallVectorImpl<std::unique_ptr<DwarfCompileUnit>> &getUnits() { return InfoHolder.getUnits(); } @@ -347,9 +326,6 @@ class DwarfDebug : public AsmPrinterHandler { /// Construct a DIE for this abstract scope. void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); - /// Collect info for variables that were optimized out. - void collectDeadVariables(); - void finishVariableDefinitions(); void finishSubprogramDefinitions(); @@ -397,7 +373,7 @@ class DwarfDebug : public AsmPrinterHandler { bool GnuStyle, MCSection *PSec, StringRef Name, const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const); - /// Emit visible names into a debug str section. + /// Emit null-terminated strings into a debug str section. void emitDebugStr(); /// Emit variable locations into a debug loc section. @@ -414,17 +390,15 @@ class DwarfDebug : public AsmPrinterHandler { /// Emit macros into a debug macinfo section. void emitDebugMacinfo(); - unsigned emitMacro(AsmStreamerBase *AS, DIMacro &M); - unsigned emitMacroFile(AsmStreamerBase *AS, DIMacroFile &F, - DwarfCompileUnit &U); - unsigned handleMacroNodes(AsmStreamerBase *AS, DIMacroNodeArray Nodes, - DwarfCompileUnit &U); + void emitMacro(DIMacro &M); + void emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U); + void handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U); /// DWARF 5 Experimental Split Dwarf Emitters /// Initialize common features of skeleton units. void initSkeletonUnit(const DwarfUnit &U, DIE &Die, - std::unique_ptr<DwarfUnit> NewU); + std::unique_ptr<DwarfCompileUnit> NewU); /// Construct the split debug info compile unit for the debug info /// section. @@ -460,10 +434,6 @@ class DwarfDebug : public AsmPrinterHandler { void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope, unsigned Flags); - /// Indentify instructions that are marking the beginning of or - /// ending of a scope. - void identifyScopeMarkers(); - /// Populate LexicalScope entries with variables' info. void collectVariableInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP, DenseSet<InlinedVariable> &ProcessedVars); @@ -477,16 +447,6 @@ class DwarfDebug : public AsmPrinterHandler { /// by MMI. void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &P); - /// Ensure that a label will be emitted before MI. - void requestLabelBeforeInsn(const MachineInstr *MI) { - LabelsBeforeInsn.insert(std::make_pair(MI, nullptr)); - } - - /// Ensure that a label will be emitted after MI. - void requestLabelAfterInsn(const MachineInstr *MI) { - LabelsAfterInsn.insert(std::make_pair(MI, nullptr)); - } - public: //===--------------------------------------------------------------------===// // Main entry points. @@ -511,9 +471,6 @@ public: /// Process beginning of an instruction. void beginInstruction(const MachineInstr *MI) override; - /// Process end of an instruction. - void endInstruction() override; - /// Perform an MD5 checksum of \p Identifier and return the lower 64 bits. static uint64_t makeTypeSignature(StringRef Identifier); @@ -531,21 +488,17 @@ public: SymSize[Sym] = Size; } - /// Returns whether to emit DW_AT_[MIPS_]linkage_name. - bool useLinkageNames() const { return UseLinkageNames; } + /// Returns whether we should emit all DW_AT_[MIPS_]linkage_name. + /// If not, we still might emit certain cases. + bool useAllLinkageNames() const { return UseAllLinkageNames; } /// Returns whether to use DW_OP_GNU_push_tls_address, instead of the /// standard DW_OP_form_tls_address opcode bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; } - /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger. - /// - /// Returns whether we are "tuning" for a given debugger. - /// @{ - bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; } - bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; } - bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; } - /// @} + /// Returns whether to use the DWARF2 format for bitfields instyead of the + /// DWARF4 format. + bool useDWARF2Bitfields() const { return UseDWARF2Bitfields; } // Experimental DWARF5 features. @@ -553,6 +506,10 @@ public: /// use to accelerate lookup. bool useDwarfAccelTables() const { return HasDwarfAccelTables; } + bool useAppleExtensionAttributes() const { + return HasAppleExtensionAttributes; + } + /// Returns whether or not to change the current debug info for the /// split dwarf proposal support. bool useSplitDwarf() const { return HasSplitDwarf; } @@ -577,12 +534,7 @@ public: /// Find the MDNode for the given reference. template <typename T> T *resolve(TypedDINodeRef<T> Ref) const { - return Ref.resolve(TypeIdentifierMap); - } - - /// Return the TypeIdentifierMap. - const DITypeIdentifierMap &getTypeIdentifierMap() const { - return TypeIdentifierMap; + return Ref.resolve(); } /// Find the DwarfCompileUnit for the given CU Die. @@ -608,12 +560,6 @@ public: /// going to be null. bool isLexicalScopeDIENull(LexicalScope *Scope); - /// Return Label preceding the instruction. - MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); - - /// Return Label immediately following the instruction. - MCSymbol *getLabelAfterInsn(const MachineInstr *MI); - // FIXME: Sink these functions down into DwarfFile/Dwarf*Unit. SmallPtrSet<const MDNode *, 16> &getProcessedSPNodes() { diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h index f4667b4..8287f28 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -16,6 +16,7 @@ #include "EHStreamer.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCDwarf.h" namespace llvm { class MachineFunction; @@ -29,12 +30,16 @@ protected: bool shouldEmitCFI; void markFunctionEnd() override; + void endFragment() override; }; class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public DwarfCFIExceptionBase { /// Per-function flag to indicate if .cfi_personality should be emitted. bool shouldEmitPersonality; + /// Per-function flag to indicate if .cfi_personality must be emitted. + bool forceEmitPersonality; + /// Per-function flag to indicate if .cfi_lsda should be emitted. bool shouldEmitLSDA; @@ -59,6 +64,9 @@ public: /// Gather and emit post-function exception information. void endFunction(const MachineFunction *) override; + + void beginFragment(const MachineBasicBlock *MBB, + ExceptionSymbolProvider ESP) override; }; class LLVM_LIBRARY_VISIBILITY ARMException : public DwarfCFIExceptionBase { diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 7b5b831..7dbc6cb 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -65,8 +65,9 @@ void DwarfExpression::AddShr(unsigned ShiftBy) { EmitOp(dwarf::DW_OP_shr); } -bool DwarfExpression::AddMachineRegIndirect(unsigned MachineReg, int Offset) { - if (isFrameRegister(MachineReg)) { +bool DwarfExpression::AddMachineRegIndirect(const TargetRegisterInfo &TRI, + unsigned MachineReg, int Offset) { + if (isFrameRegister(TRI, MachineReg)) { // If variable offset is based in frame register then use fbreg. EmitOp(dwarf::DW_OP_fbreg); EmitSigned(Offset); @@ -81,7 +82,8 @@ bool DwarfExpression::AddMachineRegIndirect(unsigned MachineReg, int Offset) { return true; } -bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg, +bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI, + unsigned MachineReg, unsigned PieceSizeInBits, unsigned PieceOffsetInBits) { if (!TRI.isPhysicalRegister(MachineReg)) @@ -159,29 +161,37 @@ bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg, return CurPos > PieceOffsetInBits; } -void DwarfExpression::AddSignedConstant(int Value) { - EmitOp(dwarf::DW_OP_consts); - EmitSigned(Value); - // The proper way to describe a constant value is - // DW_OP_constu <const>, DW_OP_stack_value. - // Unfortunately, DW_OP_stack_value was not available until DWARF-4, - // so we will continue to generate DW_OP_constu <const> for DWARF-2 - // and DWARF-3. Technically, this is incorrect since DW_OP_const <const> - // actually describes a value at a constant addess, not a constant value. - // However, in the past there was no better way to describe a constant - // value, so the producers and consumers started to rely on heuristics - // to disambiguate the value vs. location status of the expression. - // See PR21176 for more details. +void DwarfExpression::AddStackValue() { if (DwarfVersion >= 4) EmitOp(dwarf::DW_OP_stack_value); } -void DwarfExpression::AddUnsignedConstant(unsigned Value) { +void DwarfExpression::AddSignedConstant(int64_t Value) { + EmitOp(dwarf::DW_OP_consts); + EmitSigned(Value); + AddStackValue(); +} + +void DwarfExpression::AddUnsignedConstant(uint64_t Value) { EmitOp(dwarf::DW_OP_constu); EmitUnsigned(Value); - // cf. comment in DwarfExpression::AddSignedConstant(). - if (DwarfVersion >= 4) - EmitOp(dwarf::DW_OP_stack_value); + AddStackValue(); +} + +void DwarfExpression::AddUnsignedConstant(const APInt &Value) { + unsigned Size = Value.getBitWidth(); + const uint64_t *Data = Value.getRawData(); + + // Chop it up into 64-bit pieces, because that's the maximum that + // AddUnsignedConstant takes. + unsigned Offset = 0; + while (Offset < Size) { + AddUnsignedConstant(*Data++); + if (Offset == 0 && Size <= 64) + break; + AddOpPiece(std::min(Size-Offset, 64u), Offset); + Offset += 64; + } } static unsigned getOffsetOrZero(unsigned OffsetInBits, @@ -192,13 +202,14 @@ static unsigned getOffsetOrZero(unsigned OffsetInBits, return OffsetInBits; } -bool DwarfExpression::AddMachineRegExpression(const DIExpression *Expr, +bool DwarfExpression::AddMachineRegExpression(const TargetRegisterInfo &TRI, + const DIExpression *Expr, unsigned MachineReg, unsigned PieceOffsetInBits) { auto I = Expr->expr_op_begin(); auto E = Expr->expr_op_end(); if (I == E) - return AddMachineRegPiece(MachineReg); + return AddMachineRegPiece(TRI, MachineReg); // Pattern-match combinations for which more efficient representations exist // first. @@ -208,7 +219,7 @@ bool DwarfExpression::AddMachineRegExpression(const DIExpression *Expr, unsigned OffsetInBits = I->getArg(0); unsigned SizeInBits = I->getArg(1); // Piece always comes at the end of the expression. - return AddMachineRegPiece(MachineReg, SizeInBits, + return AddMachineRegPiece(TRI, MachineReg, SizeInBits, getOffsetOrZero(OffsetInBits, PieceOffsetInBits)); } case dwarf::DW_OP_plus: @@ -219,15 +230,15 @@ bool DwarfExpression::AddMachineRegExpression(const DIExpression *Expr, if (N != E && N->getOp() == dwarf::DW_OP_deref) { unsigned Offset = I->getArg(0); ValidReg = AddMachineRegIndirect( - MachineReg, I->getOp() == dwarf::DW_OP_plus ? Offset : -Offset); + TRI, MachineReg, I->getOp() == dwarf::DW_OP_plus ? Offset : -Offset); std::advance(I, 2); break; } else - ValidReg = AddMachineRegPiece(MachineReg); + ValidReg = AddMachineRegPiece(TRI, MachineReg); } case dwarf::DW_OP_deref: { // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg]. - ValidReg = AddMachineRegIndirect(MachineReg); + ValidReg = AddMachineRegIndirect(TRI, MachineReg); ++I; break; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h index 78ec937..5fff28d 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -31,13 +31,10 @@ class DIELoc; class DwarfExpression { protected: // Various convenience accessors that extract things out of AsmPrinter. - const TargetRegisterInfo &TRI; unsigned DwarfVersion; public: - DwarfExpression(const TargetRegisterInfo &TRI, - unsigned DwarfVersion) - : TRI(TRI), DwarfVersion(DwarfVersion) {} + DwarfExpression(unsigned DwarfVersion) : DwarfVersion(DwarfVersion) {} virtual ~DwarfExpression() {} /// Output a dwarf operand and an optional assembler comment. @@ -48,7 +45,7 @@ public: virtual void EmitUnsigned(uint64_t Value) = 0; /// Return whether the given machine register is the frame register in the /// current function. - virtual bool isFrameRegister(unsigned MachineReg) = 0; + virtual bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) = 0; /// Emit a dwarf register operation. void AddReg(int DwarfReg, const char *Comment = nullptr); @@ -61,10 +58,24 @@ public: void AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits = 0); /// Emit a shift-right dwarf expression. void AddShr(unsigned ShiftBy); + /// Emit a DW_OP_stack_value, if supported. + /// + /// The proper way to describe a constant value is + /// DW_OP_constu <const>, DW_OP_stack_value. + /// Unfortunately, DW_OP_stack_value was not available until DWARF-4, + /// so we will continue to generate DW_OP_constu <const> for DWARF-2 + /// and DWARF-3. Technically, this is incorrect since DW_OP_const <const> + /// actually describes a value at a constant addess, not a constant value. + /// However, in the past there was no better way to describe a constant + /// value, so the producers and consumers started to rely on heuristics + /// to disambiguate the value vs. location status of the expression. + /// See PR21176 for more details. + void AddStackValue(); /// Emit an indirect dwarf register operation for the given machine register. /// \return false if no DWARF register exists for MachineReg. - bool AddMachineRegIndirect(unsigned MachineReg, int Offset = 0); + bool AddMachineRegIndirect(const TargetRegisterInfo &TRI, unsigned MachineReg, + int Offset = 0); /// \brief Emit a partial DWARF register operation. /// \param MachineReg the register @@ -80,20 +91,24 @@ public: /// subregisters that alias the register. /// /// \return false if no DWARF register exists for MachineReg. - bool AddMachineRegPiece(unsigned MachineReg, unsigned PieceSizeInBits = 0, + bool AddMachineRegPiece(const TargetRegisterInfo &TRI, unsigned MachineReg, + unsigned PieceSizeInBits = 0, unsigned PieceOffsetInBits = 0); /// Emit a signed constant. - void AddSignedConstant(int Value); + void AddSignedConstant(int64_t Value); + /// Emit an unsigned constant. + void AddUnsignedConstant(uint64_t Value); /// Emit an unsigned constant. - void AddUnsignedConstant(unsigned Value); + void AddUnsignedConstant(const APInt &Value); /// \brief Emit an entire expression on top of a machine register location. /// /// \param PieceOffsetInBits If this is one piece out of a fragmented /// location, this is the offset of the piece inside the entire variable. /// \return false if no DWARF register exists for MachineReg. - bool AddMachineRegExpression(const DIExpression *Expr, unsigned MachineReg, + bool AddMachineRegExpression(const TargetRegisterInfo &TRI, + const DIExpression *Expr, unsigned MachineReg, unsigned PieceOffsetInBits = 0); /// Emit a the operations remaining the DIExpressionIterator I. /// \param PieceOffsetInBits If this is one piece out of a fragmented @@ -108,14 +123,14 @@ class DebugLocDwarfExpression : public DwarfExpression { ByteStreamer &BS; public: - DebugLocDwarfExpression(const TargetRegisterInfo &TRI, - unsigned DwarfVersion, ByteStreamer &BS) - : DwarfExpression(TRI, DwarfVersion), BS(BS) {} + DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS) + : DwarfExpression(DwarfVersion), BS(BS) {} void EmitOp(uint8_t Op, const char *Comment = nullptr) override; void EmitSigned(int64_t Value) override; void EmitUnsigned(uint64_t Value) override; - bool isFrameRegister(unsigned MachineReg) override; + bool isFrameRegister(const TargetRegisterInfo &TRI, + unsigned MachineReg) override; }; /// DwarfExpression implementation for singular DW_AT_location. @@ -129,7 +144,8 @@ public: void EmitOp(uint8_t Op, const char *Comment = nullptr) override; void EmitSigned(int64_t Value) override; void EmitUnsigned(uint64_t Value) override; - bool isFrameRegister(unsigned MachineReg) override; + bool isFrameRegister(const TargetRegisterInfo &TRI, + unsigned MachineReg) override; }; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp index 51b27b4..e9fe98a 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "DwarfFile.h" +#include "DwarfCompileUnit.h" #include "DwarfDebug.h" #include "DwarfUnit.h" #include "llvm/ADT/STLExtras.h" @@ -50,22 +51,25 @@ DIEAbbrev &DwarfFile::assignAbbrevNumber(DIE &Die) { return *New; } -void DwarfFile::addUnit(std::unique_ptr<DwarfUnit> U) { +void DwarfFile::addUnit(std::unique_ptr<DwarfCompileUnit> U) { CUs.push_back(std::move(U)); } // Emit the various dwarf units to the unit section USection with // the abbreviations going into ASection. void DwarfFile::emitUnits(bool UseOffsets) { - for (const auto &TheU : CUs) { - DIE &Die = TheU->getUnitDie(); - MCSection *USection = TheU->getSection(); - Asm->OutStreamer->SwitchSection(USection); + for (const auto &TheU : CUs) + emitUnit(TheU.get(), UseOffsets); +} - TheU->emitHeader(UseOffsets); +void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) { + DIE &Die = TheU->getUnitDie(); + MCSection *USection = TheU->getSection(); + Asm->OutStreamer->SwitchSection(USection); - Asm->emitDwarfDIE(Die); - } + TheU->emitHeader(UseOffsets); + + Asm->emitDwarfDIE(Die); } // Compute the size and offset for each DIE. @@ -77,17 +81,20 @@ void DwarfFile::computeSizeAndOffsets() { // DIE within each compile unit. All offsets are CU relative. for (const auto &TheU : CUs) { TheU->setDebugInfoOffset(SecOffset); + SecOffset += computeSizeAndOffsetsForUnit(TheU.get()); + } +} - // CU-relative offset is reset to 0 here. - unsigned Offset = sizeof(int32_t) + // Length of Unit Info - TheU->getHeaderSize(); // Unit-specific headers +unsigned DwarfFile::computeSizeAndOffsetsForUnit(DwarfUnit *TheU) { + // CU-relative offset is reset to 0 here. + unsigned Offset = sizeof(int32_t) + // Length of Unit Info + TheU->getHeaderSize(); // Unit-specific headers - // EndOffset here is CU-relative, after laying out - // all of the CU DIE. - unsigned EndOffset = computeSizeAndOffset(TheU->getUnitDie(), Offset); - SecOffset += EndOffset; - } + // The return value here is CU-relative, after laying out + // all of the CU DIE. + return computeSizeAndOffset(TheU->getUnitDie(), Offset); } + // Compute the size and offset of a DIE. The offset is relative to start of the // CU. It returns the offset after laying out the DIE. unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) { diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h index 8402027..b73d89b 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -16,14 +16,15 @@ #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/Allocator.h" #include <memory> -#include <string> #include <vector> namespace llvm { class AsmPrinter; class DbgVariable; +class DwarfCompileUnit; class DwarfUnit; class DIEAbbrev; class MCSymbol; @@ -46,7 +47,7 @@ class DwarfFile { std::vector<DIEAbbrev *> Abbreviations; // A pointer to all units in the section. - SmallVector<std::unique_ptr<DwarfUnit>, 1> CUs; + SmallVector<std::unique_ptr<DwarfCompileUnit>, 1> CUs; DwarfStringPool StrPool; @@ -66,7 +67,9 @@ public: ~DwarfFile(); - const SmallVectorImpl<std::unique_ptr<DwarfUnit>> &getUnits() { return CUs; } + const SmallVectorImpl<std::unique_ptr<DwarfCompileUnit>> &getUnits() { + return CUs; + } /// \brief Compute the size and offset of a DIE given an incoming Offset. unsigned computeSizeAndOffset(DIE &Die, unsigned Offset); @@ -74,6 +77,10 @@ public: /// \brief Compute the size and offset of all the DIEs. void computeSizeAndOffsets(); + /// \brief Compute the size and offset of all the DIEs in the given unit. + /// \returns The size of the root DIE. + unsigned computeSizeAndOffsetsForUnit(DwarfUnit *TheU); + /// Define a unique number for the abbreviation. /// /// Compute the abbreviation for \c Die, look up its unique number, and @@ -81,12 +88,15 @@ public: DIEAbbrev &assignAbbrevNumber(DIE &Die); /// \brief Add a unit to the list of CUs. - void addUnit(std::unique_ptr<DwarfUnit> U); + void addUnit(std::unique_ptr<DwarfCompileUnit> U); /// \brief Emit all of the units to the section listed with the given /// abbreviation section. void emitUnits(bool UseOffsets); + /// \brief Emit the given unit to its section. + void emitUnit(DwarfUnit *U, bool UseOffsets); + /// \brief Emit a set of abbreviations to the specific section. void emitAbbrevs(MCSection *); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index d75fea5..4100d72 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -46,9 +46,8 @@ GenerateDwarfTypeUnits("generate-type-units", cl::Hidden, DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE) - : DwarfExpression(*AP.MF->getSubtarget().getRegisterInfo(), - AP.getDwarfDebug()->getDwarfVersion()), - AP(AP), DU(DU), DIE(DIE) {} + : DwarfExpression(AP.getDwarfDebug()->getDwarfVersion()), AP(AP), DU(DU), + DIE(DIE) {} void DIEDwarfExpression::EmitOp(uint8_t Op, const char* Comment) { DU.addUInt(DIE, dwarf::DW_FORM_data1, Op); @@ -59,25 +58,24 @@ void DIEDwarfExpression::EmitSigned(int64_t Value) { void DIEDwarfExpression::EmitUnsigned(uint64_t Value) { DU.addUInt(DIE, dwarf::DW_FORM_udata, Value); } -bool DIEDwarfExpression::isFrameRegister(unsigned MachineReg) { +bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, + unsigned MachineReg) { return MachineReg == TRI.getFrameRegister(*AP.MF); } -DwarfUnit::DwarfUnit(unsigned UID, dwarf::Tag UnitTag, - const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, - DwarfFile *DWU) - : UniqueID(UID), CUNode(Node), - UnitDie(*DIE::get(DIEValueAllocator, UnitTag)), DebugInfoOffset(0), - Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr) { +DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node, + AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU) + : CUNode(Node), UnitDie(*DIE::get(DIEValueAllocator, UnitTag)), Asm(A), + DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr) { assert(UnitTag == dwarf::DW_TAG_compile_unit || UnitTag == dwarf::DW_TAG_type_unit); } -DwarfTypeUnit::DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A, +DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU, MCDwarfDwoLineTable *SplitLineTable) - : DwarfUnit(UID, dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU), - CU(CU), SplitLineTable(SplitLineTable) { + : DwarfUnit(dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU), CU(CU), + SplitLineTable(SplitLineTable) { if (SplitLineTable) addSectionOffset(UnitDie, dwarf::DW_AT_stmt_list, 0); } @@ -268,7 +266,7 @@ void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry) { addDIEEntry(Die, Attribute, DIEEntry(Entry)); } -void DwarfUnit::addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type) { +void DwarfUnit::addDIETypeSignature(DIE &Die, uint64_t Signature) { // Flag the type unit reference as a declaration so that if it contains // members (implicit special members, static data member definitions, member // declarations for definitions in this CU, etc) consumers don't get confused @@ -276,7 +274,7 @@ void DwarfUnit::addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type) { addFlag(Die, dwarf::DW_AT_declaration); Die.addValue(DIEValueAllocator, dwarf::DW_AT_signature, - dwarf::DW_FORM_ref_sig8, DIETypeSignature(Type)); + dwarf::DW_FORM_ref_sig8, DIEInteger(Signature)); } void DwarfUnit::addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute, @@ -370,14 +368,16 @@ void DwarfUnit::addSourceLine(DIE &Die, const DINamespace *NS) { bool DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg, unsigned SizeInBits, unsigned OffsetInBits) { DIEDwarfExpression Expr(*Asm, *this, TheDie); - Expr.AddMachineRegPiece(Reg, SizeInBits, OffsetInBits); + Expr.AddMachineRegPiece(*Asm->MF->getSubtarget().getRegisterInfo(), Reg, + SizeInBits, OffsetInBits); return true; } bool DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset) { DIEDwarfExpression Expr(*Asm, *this, TheDie); - return Expr.AddMachineRegIndirect(Reg, Offset); + return Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(), + Reg, Offset); } /* Byref variables, in Blocks, are declared by the programmer as "SomeType @@ -561,32 +561,6 @@ static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) { Ty->getTag() == dwarf::DW_TAG_unspecified_type; } -/// If this type is derived from a base type then return base type size. -static uint64_t getBaseTypeSize(DwarfDebug *DD, const DIDerivedType *Ty) { - unsigned Tag = Ty->getTag(); - - if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef && - Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && - Tag != dwarf::DW_TAG_restrict_type) - return Ty->getSizeInBits(); - - auto *BaseType = DD->resolve(Ty->getBaseType()); - - assert(BaseType && "Unexpected invalid base type"); - - // If this is a derived type, go ahead and get the base type, unless it's a - // reference then it's just the size of the field. Pointer types have no need - // of this since they're a different type of qualification on the type. - if (BaseType->getTag() == dwarf::DW_TAG_reference_type || - BaseType->getTag() == dwarf::DW_TAG_rvalue_reference_type) - return Ty->getSizeInBits(); - - if (auto *DT = dyn_cast<DIDerivedType>(BaseType)) - return getBaseTypeSize(DD, DT); - - return BaseType->getSizeInBits(); -} - void DwarfUnit::addConstantFPValue(DIE &Die, const MachineOperand &MO) { assert(MO.isFPImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock; @@ -667,7 +641,7 @@ void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) { } void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) { - if (!LinkageName.empty() && DD->useLinkageNames()) + if (!LinkageName.empty()) addString(Die, DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name : dwarf::DW_AT_MIPS_linkage_name, @@ -720,8 +694,6 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) { return nullptr; auto *Ty = cast<DIType>(TyNode); - assert(Ty == resolve(Ty->getRef()) && - "type was not uniqued, possible ODR violation."); // DW_TAG_restrict_type is not supported in DWARF2 if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2) @@ -903,6 +875,11 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) { Language == dwarf::DW_LANG_ObjC)) addFlag(Buffer, dwarf::DW_AT_prototyped); + // Add a DW_AT_calling_convention if this has an explicit convention. + if (CTy->getCC() && CTy->getCC() != dwarf::DW_CC_normal) + addUInt(Buffer, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1, + CTy->getCC()); + if (CTy->isLValueReference()) addFlag(Buffer, dwarf::DW_AT_reference); @@ -1050,14 +1027,18 @@ void DwarfUnit::constructTemplateValueParameterDIE( if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val)) addConstantValue(ParamDIE, CI, resolve(VP->getType())); else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) { - // For declaration non-type template parameters (such as global values and - // functions) - DIELoc *Loc = new (DIEValueAllocator) DIELoc; - addOpAddress(*Loc, Asm->getSymbol(GV)); - // Emit DW_OP_stack_value to use the address as the immediate value of the - // parameter, rather than a pointer to it. - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); - addBlock(ParamDIE, dwarf::DW_AT_location, Loc); + // We cannot describe the location of dllimport'd entities: the + // computation of their address requires loads from the IAT. + if (!GV->hasDLLImportStorageClass()) { + // For declaration non-type template parameters (such as global values + // and functions) + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + addOpAddress(*Loc, Asm->getSymbol(GV)); + // Emit DW_OP_stack_value to use the address as the immediate value of + // the parameter, rather than a pointer to it. + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); + addBlock(ParamDIE, dwarf::DW_AT_location, Loc); + } } else if (VP->getTag() == dwarf::DW_TAG_GNU_template_template_param) { assert(isa<MDString>(Val)); addString(ParamDIE, dwarf::DW_AT_GNU_template_name, @@ -1171,7 +1152,9 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP, assert(((LinkageName.empty() || DeclLinkageName.empty()) || LinkageName == DeclLinkageName) && "decl has a linkage name and it is different"); - if (DeclLinkageName.empty()) + if (DeclLinkageName.empty() && + // Always emit it for abstract subprograms. + (DD->useAllLinkageNames() || DU->getAbstractSPDies().lookup(SP))) addLinkageName(SPDie, LinkageName); if (!DeclDie) @@ -1207,9 +1190,16 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, Language == dwarf::DW_LANG_ObjC)) addFlag(SPDie, dwarf::DW_AT_prototyped); + unsigned CC = 0; DITypeRefArray Args; - if (const DISubroutineType *SPTy = SP->getType()) + if (const DISubroutineType *SPTy = SP->getType()) { Args = SPTy->getTypeArray(); + CC = SPTy->getCC(); + } + + // Add a DW_AT_calling_convention if this has an explicit convention. + if (CC && CC != dwarf::DW_CC_normal) + addUInt(SPDie, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1, CC); // Add a return type. If this is a type like a C/C++ void type we don't add a // return type. @@ -1220,10 +1210,12 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, unsigned VK = SP->getVirtuality(); if (VK) { addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK); - DIELoc *Block = getDIELoc(); - addUInt(*Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(*Block, dwarf::DW_FORM_udata, SP->getVirtualIndex()); - addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block); + if (SP->getVirtualIndex() != -1u) { + DIELoc *Block = getDIELoc(); + addUInt(*Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(*Block, dwarf::DW_FORM_udata, SP->getVirtualIndex()); + addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block); + } ContainingTypeMap.insert( std::make_pair(&SPDie, resolve(SP->getContainingType()))); } @@ -1242,11 +1234,13 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, if (!SP->isLocalToUnit()) addFlag(SPDie, dwarf::DW_AT_external); - if (SP->isOptimized()) - addFlag(SPDie, dwarf::DW_AT_APPLE_optimized); + if (DD->useAppleExtensionAttributes()) { + if (SP->isOptimized()) + addFlag(SPDie, dwarf::DW_AT_APPLE_optimized); - if (unsigned isa = Asm->getISAEncoding()) - addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); + if (unsigned isa = Asm->getISAEncoding()) + addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); + } if (SP->isLValueReference()) addFlag(SPDie, dwarf::DW_AT_reference); @@ -1388,58 +1382,49 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie); } else { uint64_t Size = DT->getSizeInBits(); - uint64_t FieldSize = getBaseTypeSize(DD, DT); + uint64_t FieldSize = DD->getBaseTypeSize(DT); uint64_t OffsetInBytes; - if (FieldSize && Size != FieldSize) { + bool IsBitfield = FieldSize && Size != FieldSize; + if (IsBitfield) { // Handle bitfield, assume bytes are 8 bits. - addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8); + if (DD->useDWARF2Bitfields()) + addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8); addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size); - // - // The DWARF 2 DW_AT_bit_offset is counting the bits between the most - // significant bit of the aligned storage unit containing the bit field to - // the most significan bit of the bit field. - // - // FIXME: DWARF 4 states that DW_AT_data_bit_offset (which - // counts from the beginning, regardless of endianness) should - // be used instead. - // - // - // Struct Align Align Align - // v v v v - // +-----------+-----*-----+-----*-----+-- - // | ... |b1|b2|b3|b4| - // +-----------+-----*-----+-----*-----+-- - // | | |<-- Size ->| | - // |<---- Offset --->| |<--->| - // | | | \_ DW_AT_bit_offset (little endian) - // | |<--->| - // |<--------->| \_ StartBitOffset = DW_AT_bit_offset (big endian) - // \ = DW_AT_data_bit_offset (biendian) - // \_ OffsetInBytes + uint64_t Offset = DT->getOffsetInBits(); uint64_t Align = DT->getAlignInBits() ? DT->getAlignInBits() : FieldSize; uint64_t AlignMask = ~(Align - 1); // The bits from the start of the storage unit to the start of the field. uint64_t StartBitOffset = Offset - (Offset & AlignMask); - // The endian-dependent DWARF 2 offset. - uint64_t DwarfBitOffset = Asm->getDataLayout().isLittleEndian() - ? OffsetToAlignment(Offset + Size, Align) - : StartBitOffset; - // The byte offset of the field's aligned storage unit inside the struct. OffsetInBytes = (Offset - StartBitOffset) / 8; - addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, DwarfBitOffset); - } else + + if (DD->useDWARF2Bitfields()) { + uint64_t HiMark = (Offset + FieldSize) & AlignMask; + uint64_t FieldOffset = (HiMark - FieldSize); + Offset -= FieldOffset; + + // Maybe we need to work from the other end. + if (Asm->getDataLayout().isLittleEndian()) + Offset = FieldSize - (Offset + Size); + + addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, Offset); + OffsetInBytes = FieldOffset >> 3; + } else { + addUInt(MemberDie, dwarf::DW_AT_data_bit_offset, None, Offset); + } + } else { // This is not a bitfield. OffsetInBytes = DT->getOffsetInBits() / 8; + } if (DD->getDwarfVersion() <= 2) { DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc; addUInt(*MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); addUInt(*MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes); addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie); - } else + } else if (!IsBitfield || DD->useDWARF2Bitfields()) addUInt(MemberDie, dwarf::DW_AT_data_member_location, None, OffsetInBytes); } @@ -1524,8 +1509,11 @@ void DwarfUnit::emitHeader(bool UseOffsets) { // start of the section. Use a relocatable offset where needed to ensure // linking doesn't invalidate that offset. const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - Asm->emitDwarfSymbolReference(TLOF.getDwarfAbbrevSection()->getBeginSymbol(), - UseOffsets); + if (UseOffsets) + Asm->EmitInt32(0); + else + Asm->emitDwarfSymbolReference( + TLOF.getDwarfAbbrevSection()->getBeginSymbol(), false); Asm->OutStreamer->AddComment("Address Size (in bytes)"); Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 82760bf..e225f92 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -67,9 +67,6 @@ public: /// source file. class DwarfUnit { protected: - /// A numeric ID unique among all CUs in the module - unsigned UniqueID; - /// MDNode for the compile unit. const DICompileUnit *CUNode; @@ -79,9 +76,6 @@ protected: /// Unit debug information entry. DIE &UnitDie; - /// Offset of the UnitDie from beginning of debug info section. - unsigned DebugInfoOffset; - /// Target of Dwarf emission. AsmPrinter *Asm; @@ -110,8 +104,8 @@ protected: /// The section this unit will be emitted in. MCSection *Section; - DwarfUnit(unsigned UID, dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A, - DwarfDebug *DW, DwarfFile *DWU); + DwarfUnit(dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A, DwarfDebug *DW, + DwarfFile *DWU); bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie); @@ -127,14 +121,10 @@ public: // Accessors. AsmPrinter* getAsmPrinter() const { return Asm; } - unsigned getUniqueID() const { return UniqueID; } uint16_t getLanguage() const { return CUNode->getSourceLanguage(); } const DICompileUnit *getCUNode() const { return CUNode; } DIE &getUnitDie() { return UnitDie; } - unsigned getDebugInfoOffset() const { return DebugInfoOffset; } - void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } - /// Return true if this compile unit has something to write out. bool hasContent() const { return UnitDie.hasChildren(); } @@ -221,7 +211,7 @@ public: void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry Entry); /// Add a type's DW_AT_signature and set the declaration flag. - void addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type); + void addDIETypeSignature(DIE &Die, uint64_t Signature); /// Add an attribute containing the type signature for a unique identifier. void addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute, StringRef Identifier); @@ -338,7 +328,7 @@ protected: /// Look in the DwarfDebug map for the MDNode that corresponds to the /// reference. template <typename T> T *resolve(TypedDINodeRef<T> Ref) const { - return DD->resolve(Ref); + return Ref.resolve(); } private: @@ -383,12 +373,10 @@ class DwarfTypeUnit : public DwarfUnit { bool isDwoUnit() const override; public: - DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A, - DwarfDebug *DW, DwarfFile *DWU, - MCDwarfDwoLineTable *SplitLineTable = nullptr); + DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A, DwarfDebug *DW, + DwarfFile *DWU, MCDwarfDwoLineTable *SplitLineTable = nullptr); void setTypeSignature(uint64_t Signature) { TypeSignature = Signature; } - uint64_t getTypeSignature() const { return TypeSignature; } void setType(const DIE *Ty) { this->Ty = Ty; } /// Emit the header for this unit, not including the initial length field. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h index c6a0e9d..080fdd1 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -22,7 +22,6 @@ struct LandingPadInfo; class MachineModuleInfo; class MachineInstr; class MachineFunction; -class AsmPrinter; class MCSymbol; class MCSymbolRefExpr; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp deleted file mode 100644 index 1e2f55b..0000000 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp +++ /dev/null @@ -1,411 +0,0 @@ -//===-- llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp --*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains support for writing line tables info into COFF files. -// -//===----------------------------------------------------------------------===// - -#include "WinCodeViewLineTables.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Support/COFF.h" - -namespace llvm { - -StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) { - assert(S); - assert((isa<DICompileUnit>(S) || isa<DIFile>(S) || isa<DISubprogram>(S) || - isa<DILexicalBlockBase>(S)) && - "Unexpected scope info"); - - auto *Scope = cast<DIScope>(S); - StringRef Dir = Scope->getDirectory(), - Filename = Scope->getFilename(); - std::string &Filepath = - DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)]; - if (!Filepath.empty()) - return Filepath; - - // Clang emits directory and relative filename info into the IR, but CodeView - // operates on full paths. We could change Clang to emit full paths too, but - // that would increase the IR size and probably not needed for other users. - // For now, just concatenate and canonicalize the path here. - if (Filename.find(':') == 1) - Filepath = Filename; - else - Filepath = (Dir + "\\" + Filename).str(); - - // Canonicalize the path. We have to do it textually because we may no longer - // have access the file in the filesystem. - // First, replace all slashes with backslashes. - std::replace(Filepath.begin(), Filepath.end(), '/', '\\'); - - // Remove all "\.\" with "\". - size_t Cursor = 0; - while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos) - Filepath.erase(Cursor, 2); - - // Replace all "\XXX\..\" with "\". Don't try too hard though as the original - // path should be well-formatted, e.g. start with a drive letter, etc. - Cursor = 0; - while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) { - // Something's wrong if the path starts with "\..\", abort. - if (Cursor == 0) - break; - - size_t PrevSlash = Filepath.rfind('\\', Cursor - 1); - if (PrevSlash == std::string::npos) - // Something's wrong, abort. - break; - - Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash); - // The next ".." might be following the one we've just erased. - Cursor = PrevSlash; - } - - // Remove all duplicate backslashes. - Cursor = 0; - while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos) - Filepath.erase(Cursor, 1); - - return Filepath; -} - -void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL, - const MachineFunction *MF) { - const MDNode *Scope = DL.getScope(); - if (!Scope) - return; - unsigned LineNumber = DL.getLine(); - // Skip this line if it is longer than the maximum we can record. - if (LineNumber > COFF::CVL_MaxLineNumber) - return; - - unsigned ColumnNumber = DL.getCol(); - // Truncate the column number if it is longer than the maximum we can record. - if (ColumnNumber > COFF::CVL_MaxColumnNumber) - ColumnNumber = 0; - - StringRef Filename = getFullFilepath(Scope); - - // Skip this instruction if it has the same file:line as the previous one. - assert(CurFn); - if (!CurFn->Instrs.empty()) { - const InstrInfoTy &LastInstr = InstrInfo[CurFn->Instrs.back()]; - if (LastInstr.Filename == Filename && LastInstr.LineNumber == LineNumber && - LastInstr.ColumnNumber == ColumnNumber) - return; - } - FileNameRegistry.add(Filename); - - MCSymbol *MCL = Asm->MMI->getContext().createTempSymbol(); - Asm->OutStreamer->EmitLabel(MCL); - CurFn->Instrs.push_back(MCL); - InstrInfo[MCL] = InstrInfoTy(Filename, LineNumber, ColumnNumber); -} - -WinCodeViewLineTables::WinCodeViewLineTables(AsmPrinter *AP) - : Asm(nullptr), CurFn(nullptr) { - MachineModuleInfo *MMI = AP->MMI; - - // If module doesn't have named metadata anchors or COFF debug section - // is not available, skip any debug info related stuff. - if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") || - !AP->getObjFileLowering().getCOFFDebugSymbolsSection()) - return; - - // Tell MMI that we have debug info. - MMI->setDebugInfoAvailability(true); - Asm = AP; -} - -void WinCodeViewLineTables::endModule() { - if (FnDebugInfo.empty()) - return; - - assert(Asm != nullptr); - Asm->OutStreamer->SwitchSection( - Asm->getObjFileLowering().getCOFFDebugSymbolsSection()); - Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC); - - // The COFF .debug$S section consists of several subsections, each starting - // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length - // of the payload followed by the payload itself. The subsections are 4-byte - // aligned. - - // Emit per-function debug information. This code is extracted into a - // separate function for readability. - for (size_t I = 0, E = VisitedFunctions.size(); I != E; ++I) - emitDebugInfoForFunction(VisitedFunctions[I]); - - // This subsection holds a file index to offset in string table table. - Asm->OutStreamer->AddComment("File index to string table offset subsection"); - Asm->EmitInt32(COFF::DEBUG_INDEX_SUBSECTION); - size_t NumFilenames = FileNameRegistry.Infos.size(); - Asm->EmitInt32(8 * NumFilenames); - for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) { - StringRef Filename = FileNameRegistry.Filenames[I]; - // For each unique filename, just write its offset in the string table. - Asm->EmitInt32(FileNameRegistry.Infos[Filename].StartOffset); - // The function name offset is not followed by any additional data. - Asm->EmitInt32(0); - } - - // This subsection holds the string table. - Asm->OutStreamer->AddComment("String table"); - Asm->EmitInt32(COFF::DEBUG_STRING_TABLE_SUBSECTION); - Asm->EmitInt32(FileNameRegistry.LastOffset); - // The payload starts with a null character. - Asm->EmitInt8(0); - - for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) { - // Just emit unique filenames one by one, separated by a null character. - Asm->OutStreamer->EmitBytes(FileNameRegistry.Filenames[I]); - Asm->EmitInt8(0); - } - - // No more subsections. Fill with zeros to align the end of the section by 4. - Asm->OutStreamer->EmitFill((-FileNameRegistry.LastOffset) % 4, 0); - - clear(); -} - -static void EmitLabelDiff(MCStreamer &Streamer, - const MCSymbol *From, const MCSymbol *To, - unsigned int Size = 4) { - MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; - MCContext &Context = Streamer.getContext(); - const MCExpr *FromRef = MCSymbolRefExpr::create(From, Variant, Context), - *ToRef = MCSymbolRefExpr::create(To, Variant, Context); - const MCExpr *AddrDelta = - MCBinaryExpr::create(MCBinaryExpr::Sub, ToRef, FromRef, Context); - Streamer.EmitValue(AddrDelta, Size); -} - -void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { - // For each function there is a separate subsection - // which holds the PC to file:line table. - const MCSymbol *Fn = Asm->getSymbol(GV); - assert(Fn); - - const FunctionInfo &FI = FnDebugInfo[GV]; - if (FI.Instrs.empty()) - return; - assert(FI.End && "Don't know where the function ends?"); - - StringRef GVName = GV->getName(); - StringRef FuncName; - if (auto *SP = getDISubprogram(GV)) - FuncName = SP->getDisplayName(); - - // FIXME Clang currently sets DisplayName to "bar" for a C++ - // "namespace_foo::bar" function, see PR21528. Luckily, dbghelp.dll is trying - // to demangle display names anyways, so let's just put a mangled name into - // the symbols subsection until Clang gives us what we need. - if (GVName.startswith("\01?")) - FuncName = GVName.substr(1); - // Emit a symbol subsection, required by VS2012+ to find function boundaries. - MCSymbol *SymbolsBegin = Asm->MMI->getContext().createTempSymbol(), - *SymbolsEnd = Asm->MMI->getContext().createTempSymbol(); - Asm->OutStreamer->AddComment("Symbol subsection for " + Twine(FuncName)); - Asm->EmitInt32(COFF::DEBUG_SYMBOL_SUBSECTION); - EmitLabelDiff(*Asm->OutStreamer, SymbolsBegin, SymbolsEnd); - Asm->OutStreamer->EmitLabel(SymbolsBegin); - { - MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().createTempSymbol(), - *ProcSegmentEnd = Asm->MMI->getContext().createTempSymbol(); - EmitLabelDiff(*Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2); - Asm->OutStreamer->EmitLabel(ProcSegmentBegin); - - Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_START); - // Some bytes of this segment don't seem to be required for basic debugging, - // so just fill them with zeroes. - Asm->OutStreamer->EmitFill(12, 0); - // This is the important bit that tells the debugger where the function - // code is located and what's its size: - EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End); - Asm->OutStreamer->EmitFill(12, 0); - Asm->OutStreamer->EmitCOFFSecRel32(Fn); - Asm->OutStreamer->EmitCOFFSectionIndex(Fn); - Asm->EmitInt8(0); - // Emit the function display name as a null-terminated string. - Asm->OutStreamer->EmitBytes(FuncName); - Asm->EmitInt8(0); - Asm->OutStreamer->EmitLabel(ProcSegmentEnd); - - // We're done with this function. - Asm->EmitInt16(0x0002); - Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_END); - } - Asm->OutStreamer->EmitLabel(SymbolsEnd); - // Every subsection must be aligned to a 4-byte boundary. - Asm->OutStreamer->EmitFill((-FuncName.size()) % 4, 0); - - // PCs/Instructions are grouped into segments sharing the same filename. - // Pre-calculate the lengths (in instructions) of these segments and store - // them in a map for convenience. Each index in the map is the sequential - // number of the respective instruction that starts a new segment. - DenseMap<size_t, size_t> FilenameSegmentLengths; - size_t LastSegmentEnd = 0; - StringRef PrevFilename = InstrInfo[FI.Instrs[0]].Filename; - for (size_t J = 1, F = FI.Instrs.size(); J != F; ++J) { - if (PrevFilename == InstrInfo[FI.Instrs[J]].Filename) - continue; - FilenameSegmentLengths[LastSegmentEnd] = J - LastSegmentEnd; - LastSegmentEnd = J; - PrevFilename = InstrInfo[FI.Instrs[J]].Filename; - } - FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd; - - // Emit a line table subsection, required to do PC-to-file:line lookup. - Asm->OutStreamer->AddComment("Line table subsection for " + Twine(FuncName)); - Asm->EmitInt32(COFF::DEBUG_LINE_TABLE_SUBSECTION); - MCSymbol *LineTableBegin = Asm->MMI->getContext().createTempSymbol(), - *LineTableEnd = Asm->MMI->getContext().createTempSymbol(); - EmitLabelDiff(*Asm->OutStreamer, LineTableBegin, LineTableEnd); - Asm->OutStreamer->EmitLabel(LineTableBegin); - - // Identify the function this subsection is for. - Asm->OutStreamer->EmitCOFFSecRel32(Fn); - Asm->OutStreamer->EmitCOFFSectionIndex(Fn); - // Insert flags after a 16-bit section index. - Asm->EmitInt16(COFF::DEBUG_LINE_TABLES_HAVE_COLUMN_RECORDS); - - // Length of the function's code, in bytes. - EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End); - - // PC-to-linenumber lookup table: - MCSymbol *FileSegmentEnd = nullptr; - - // The start of the last segment: - size_t LastSegmentStart = 0; - - auto FinishPreviousChunk = [&] { - if (!FileSegmentEnd) - return; - for (size_t ColSegI = LastSegmentStart, - ColSegEnd = ColSegI + FilenameSegmentLengths[LastSegmentStart]; - ColSegI != ColSegEnd; ++ColSegI) { - unsigned ColumnNumber = InstrInfo[FI.Instrs[ColSegI]].ColumnNumber; - assert(ColumnNumber <= COFF::CVL_MaxColumnNumber); - Asm->EmitInt16(ColumnNumber); // Start column - Asm->EmitInt16(0); // End column - } - Asm->OutStreamer->EmitLabel(FileSegmentEnd); - }; - - for (size_t J = 0, F = FI.Instrs.size(); J != F; ++J) { - MCSymbol *Instr = FI.Instrs[J]; - assert(InstrInfo.count(Instr)); - - if (FilenameSegmentLengths.count(J)) { - // We came to a beginning of a new filename segment. - FinishPreviousChunk(); - StringRef CurFilename = InstrInfo[FI.Instrs[J]].Filename; - assert(FileNameRegistry.Infos.count(CurFilename)); - size_t IndexInStringTable = - FileNameRegistry.Infos[CurFilename].FilenameID; - // Each segment starts with the offset of the filename - // in the string table. - Asm->OutStreamer->AddComment( - "Segment for file '" + Twine(CurFilename) + "' begins"); - MCSymbol *FileSegmentBegin = Asm->MMI->getContext().createTempSymbol(); - Asm->OutStreamer->EmitLabel(FileSegmentBegin); - Asm->EmitInt32(8 * IndexInStringTable); - - // Number of PC records in the lookup table. - size_t SegmentLength = FilenameSegmentLengths[J]; - Asm->EmitInt32(SegmentLength); - - // Full size of the segment for this filename, including the prev two - // records. - FileSegmentEnd = Asm->MMI->getContext().createTempSymbol(); - EmitLabelDiff(*Asm->OutStreamer, FileSegmentBegin, FileSegmentEnd); - LastSegmentStart = J; - } - - // The first PC with the given linenumber and the linenumber itself. - EmitLabelDiff(*Asm->OutStreamer, Fn, Instr); - uint32_t LineNumber = InstrInfo[Instr].LineNumber; - assert(LineNumber <= COFF::CVL_MaxLineNumber); - uint32_t LineData = LineNumber | COFF::CVL_IsStatement; - Asm->EmitInt32(LineData); - } - - FinishPreviousChunk(); - Asm->OutStreamer->EmitLabel(LineTableEnd); -} - -void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) { - assert(!CurFn && "Can't process two functions at once!"); - - if (!Asm || !Asm->MMI->hasDebugInfo()) - return; - - const Function *GV = MF->getFunction(); - assert(FnDebugInfo.count(GV) == false); - VisitedFunctions.push_back(GV); - CurFn = &FnDebugInfo[GV]; - - // Find the end of the function prolog. - // FIXME: is there a simpler a way to do this? Can we just search - // for the first instruction of the function, not the last of the prolog? - DebugLoc PrologEndLoc; - bool EmptyPrologue = true; - for (const auto &MBB : *MF) { - if (PrologEndLoc) - break; - for (const auto &MI : MBB) { - if (MI.isDebugValue()) - continue; - - // First known non-DBG_VALUE and non-frame setup location marks - // the beginning of the function body. - // FIXME: do we need the first subcondition? - if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) { - PrologEndLoc = MI.getDebugLoc(); - break; - } - EmptyPrologue = false; - } - } - // Record beginning of function if we have a non-empty prologue. - if (PrologEndLoc && !EmptyPrologue) { - DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc(); - maybeRecordLocation(FnStartDL, MF); - } -} - -void WinCodeViewLineTables::endFunction(const MachineFunction *MF) { - if (!Asm || !CurFn) // We haven't created any debug info for this function. - return; - - const Function *GV = MF->getFunction(); - assert(FnDebugInfo.count(GV)); - assert(CurFn == &FnDebugInfo[GV]); - - if (CurFn->Instrs.empty()) { - FnDebugInfo.erase(GV); - VisitedFunctions.pop_back(); - } else { - CurFn->End = Asm->getFunctionEnd(); - } - CurFn = nullptr; -} - -void WinCodeViewLineTables::beginInstruction(const MachineInstr *MI) { - // Ignore DBG_VALUE locations and function prologue. - if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup)) - return; - DebugLoc DL = MI->getDebugLoc(); - if (DL == PrevInstLoc || !DL) - return; - maybeRecordLocation(DL, Asm->MF); -} -} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h deleted file mode 100644 index 78068e0..0000000 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h +++ /dev/null @@ -1,138 +0,0 @@ -//===-- llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h ----*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains support for writing line tables info into COFF files. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H -#define LLVM_LIB_CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H - -#include "AsmPrinterHandler.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/LexicalScopes.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/DebugLoc.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Target/TargetLoweringObjectFile.h" - -namespace llvm { -/// \brief Collects and handles line tables information in a CodeView format. -class LLVM_LIBRARY_VISIBILITY WinCodeViewLineTables : public AsmPrinterHandler { - AsmPrinter *Asm; - DebugLoc PrevInstLoc; - - // For each function, store a vector of labels to its instructions, as well as - // to the end of the function. - struct FunctionInfo { - SmallVector<MCSymbol *, 10> Instrs; - MCSymbol *End; - FunctionInfo() : End(nullptr) {} - } *CurFn; - - typedef DenseMap<const Function *, FunctionInfo> FnDebugInfoTy; - FnDebugInfoTy FnDebugInfo; - // Store the functions we've visited in a vector so we can maintain a stable - // order while emitting subsections. - SmallVector<const Function *, 10> VisitedFunctions; - - // InstrInfoTy - Holds the Filename:LineNumber information for every - // instruction with a unique debug location. - struct InstrInfoTy { - StringRef Filename; - unsigned LineNumber; - unsigned ColumnNumber; - - InstrInfoTy() : LineNumber(0), ColumnNumber(0) {} - - InstrInfoTy(StringRef Filename, unsigned LineNumber, unsigned ColumnNumber) - : Filename(Filename), LineNumber(LineNumber), - ColumnNumber(ColumnNumber) {} - }; - DenseMap<MCSymbol *, InstrInfoTy> InstrInfo; - - // FileNameRegistry - Manages filenames observed while generating debug info - // by filtering out duplicates and bookkeeping the offsets in the string - // table to be generated. - struct FileNameRegistryTy { - SmallVector<StringRef, 10> Filenames; - struct PerFileInfo { - size_t FilenameID, StartOffset; - }; - StringMap<PerFileInfo> Infos; - - // The offset in the string table where we'll write the next unique - // filename. - size_t LastOffset; - - FileNameRegistryTy() { - clear(); - } - - // Add Filename to the registry, if it was not observed before. - void add(StringRef Filename) { - if (Infos.count(Filename)) - return; - size_t OldSize = Infos.size(); - Infos[Filename].FilenameID = OldSize; - Infos[Filename].StartOffset = LastOffset; - LastOffset += Filename.size() + 1; - Filenames.push_back(Filename); - } - - void clear() { - LastOffset = 1; - Infos.clear(); - Filenames.clear(); - } - } FileNameRegistry; - - typedef std::map<std::pair<StringRef, StringRef>, std::string> - DirAndFilenameToFilepathMapTy; - DirAndFilenameToFilepathMapTy DirAndFilenameToFilepathMap; - StringRef getFullFilepath(const MDNode *S); - - void maybeRecordLocation(DebugLoc DL, const MachineFunction *MF); - - void clear() { - assert(CurFn == nullptr); - FileNameRegistry.clear(); - InstrInfo.clear(); - } - - void emitDebugInfoForFunction(const Function *GV); - -public: - WinCodeViewLineTables(AsmPrinter *Asm); - - void setSymbolSize(const llvm::MCSymbol *, uint64_t) override {} - - /// \brief Emit the COFF section that holds the line table information. - void endModule() override; - - /// \brief Gather pre-function debug information. - void beginFunction(const MachineFunction *MF) override; - - /// \brief Gather post-function debug information. - void endFunction(const MachineFunction *) override; - - /// \brief Process beginning of an instruction. - void beginInstruction(const MachineInstr *MI) override; - - /// \brief Process end of an instruction. - void endInstruction() override {} -}; -} // End of namespace llvm - -#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp index 4da5b58..e5933d8 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "WinException.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" @@ -35,6 +34,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -125,10 +125,9 @@ void WinException::endFunction(const MachineFunction *MF) { if (shouldEmitPersonality || shouldEmitLSDA) { Asm->OutStreamer->PushSection(); - // Just switch sections to the right xdata section. This use of CurrentFnSym - // assumes that we only emit the LSDA when ending the parent function. - MCSection *XData = WinEH::UnwindEmitter::getXDataSection(Asm->CurrentFnSym, - Asm->OutContext); + // Just switch sections to the right xdata section. + MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection( + Asm->OutStreamer->getCurrentSectionOnly()); Asm->OutStreamer->SwitchSection(XData); // Emit the tables appropriate to the personality function in use. If we @@ -303,8 +302,17 @@ int WinException::getFrameIndexOffset(int FrameIndex, const WinEHFuncInfo &FuncInfo) { const TargetFrameLowering &TFI = *Asm->MF->getSubtarget().getFrameLowering(); unsigned UnusedReg; - if (Asm->MAI->usesWindowsCFI()) - return TFI.getFrameIndexReferenceFromSP(*Asm->MF, FrameIndex, UnusedReg); + if (Asm->MAI->usesWindowsCFI()) { + int Offset = + TFI.getFrameIndexReferencePreferSP(*Asm->MF, FrameIndex, UnusedReg, + /*IgnoreSPUpdates*/ true); + assert(UnusedReg == + Asm->MF->getSubtarget() + .getTargetLowering() + ->getStackPointerRegisterToSaveRestore()); + return Offset; + } + // For 32-bit, offsets should be relative to the end of the EH registration // node. For 64-bit, it's relative to SP at the end of the prologue. assert(FuncInfo.EHRegNodeEndOffset != INT_MAX); @@ -793,6 +801,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { const MCExpr *FrameAllocOffsetRef = nullptr; if (HT.CatchObj.FrameIndex != INT_MAX) { int Offset = getFrameIndexOffset(HT.CatchObj.FrameIndex, FuncInfo); + assert(Offset != 0 && "Illegal offset for catch object!"); FrameAllocOffsetRef = MCConstantExpr::create(Offset, Asm->OutContext); } else { FrameAllocOffsetRef = MCConstantExpr::create(0, Asm->OutContext); @@ -945,15 +954,42 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) { // ScopeTableEntry ScopeRecord[]; // }; // - // Only the EHCookieOffset field appears to vary, and it appears to be the - // offset from the final saved SP value to the retaddr. + // Offsets are %ebp relative. + // + // The GS cookie is present only if the function needs stack protection. + // GSCookieOffset = -2 means that GS cookie is not used. + // + // The EH cookie is always present. + // + // Check is done the following way: + // (ebp+CookieXOROffset) ^ [ebp+CookieOffset] == _security_cookie + + // Retrieve the Guard Stack slot. + int GSCookieOffset = -2; + const MachineFrameInfo *MFI = MF->getFrameInfo(); + if (MFI->hasStackProtectorIndex()) { + unsigned UnusedReg; + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + int SSPIdx = MFI->getStackProtectorIndex(); + GSCookieOffset = TFI->getFrameIndexReference(*MF, SSPIdx, UnusedReg); + } + + // Retrieve the EH Guard slot. + // TODO(etienneb): Get rid of this value and change it for and assertion. + int EHCookieOffset = 9999; + if (FuncInfo.EHGuardFrameIndex != INT_MAX) { + unsigned UnusedReg; + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + int EHGuardIdx = FuncInfo.EHGuardFrameIndex; + EHCookieOffset = TFI->getFrameIndexReference(*MF, EHGuardIdx, UnusedReg); + } + AddComment("GSCookieOffset"); - OS.EmitIntValue(-2, 4); + OS.EmitIntValue(GSCookieOffset, 4); AddComment("GSCookieXOROffset"); OS.EmitIntValue(0, 4); - // FIXME: Calculate. AddComment("EHCookieOffset"); - OS.EmitIntValue(9999, 4); + OS.EmitIntValue(EHCookieOffset, 4); AddComment("EHCookieXOROffset"); OS.EmitIntValue(0, 4); BaseState = -2; diff --git a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp index d12fdb2..bf5cf10 100644 --- a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -8,10 +8,10 @@ //===----------------------------------------------------------------------===// // // This file contains a pass (at IR level) to replace atomic instructions with -// target specific instruction which implement the same semantics in a way -// which better fits the target backend. This can include the use of either -// (intrinsic-based) load-linked/store-conditional loops, AtomicCmpXchg, or -// type coercions. +// __atomic_* library calls, or target specific instruction which implement the +// same semantics in a way which better fits the target backend. This can +// include the use of (intrinsic-based) load-linked/store-conditional loops, +// AtomicCmpXchg, or type coercions. // //===----------------------------------------------------------------------===// @@ -57,25 +57,121 @@ namespace { StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); bool expandAtomicStore(StoreInst *SI); bool tryExpandAtomicRMW(AtomicRMWInst *AI); - bool expandAtomicOpToLLSC( - Instruction *I, Value *Addr, AtomicOrdering MemOpOrder, - std::function<Value *(IRBuilder<> &, Value *)> PerformOp); + Value * + insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr, + AtomicOrdering MemOpOrder, + function_ref<Value *(IRBuilder<> &, Value *)> PerformOp); + void expandAtomicOpToLLSC( + Instruction *I, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder, + function_ref<Value *(IRBuilder<> &, Value *)> PerformOp); + void expandPartwordAtomicRMW( + AtomicRMWInst *I, + TargetLoweringBase::AtomicExpansionKind ExpansionKind); + void expandPartwordCmpXchg(AtomicCmpXchgInst *I); + + AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI); + static Value *insertRMWCmpXchgLoop( + IRBuilder<> &Builder, Type *ResultType, Value *Addr, + AtomicOrdering MemOpOrder, + function_ref<Value *(IRBuilder<> &, Value *)> PerformOp, + CreateCmpXchgInstFun CreateCmpXchg); + bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); bool isIdempotentRMW(AtomicRMWInst *AI); bool simplifyIdempotentRMW(AtomicRMWInst *AI); + + bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align, + Value *PointerOperand, Value *ValueOperand, + Value *CASExpected, AtomicOrdering Ordering, + AtomicOrdering Ordering2, + ArrayRef<RTLIB::Libcall> Libcalls); + void expandAtomicLoadToLibcall(LoadInst *LI); + void expandAtomicStoreToLibcall(StoreInst *LI); + void expandAtomicRMWToLibcall(AtomicRMWInst *I); + void expandAtomicCASToLibcall(AtomicCmpXchgInst *I); + + friend bool + llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, + CreateCmpXchgInstFun CreateCmpXchg); }; } char AtomicExpand::ID = 0; char &llvm::AtomicExpandID = AtomicExpand::ID; -INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand", - "Expand Atomic calls in terms of either load-linked & store-conditional or cmpxchg", - false, false) +INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand", "Expand Atomic instructions", + false, false) FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) { return new AtomicExpand(TM); } +namespace { +// Helper functions to retrieve the size of atomic instructions. +unsigned getAtomicOpSize(LoadInst *LI) { + const DataLayout &DL = LI->getModule()->getDataLayout(); + return DL.getTypeStoreSize(LI->getType()); +} + +unsigned getAtomicOpSize(StoreInst *SI) { + const DataLayout &DL = SI->getModule()->getDataLayout(); + return DL.getTypeStoreSize(SI->getValueOperand()->getType()); +} + +unsigned getAtomicOpSize(AtomicRMWInst *RMWI) { + const DataLayout &DL = RMWI->getModule()->getDataLayout(); + return DL.getTypeStoreSize(RMWI->getValOperand()->getType()); +} + +unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) { + const DataLayout &DL = CASI->getModule()->getDataLayout(); + return DL.getTypeStoreSize(CASI->getCompareOperand()->getType()); +} + +// Helper functions to retrieve the alignment of atomic instructions. +unsigned getAtomicOpAlign(LoadInst *LI) { + unsigned Align = LI->getAlignment(); + // In the future, if this IR restriction is relaxed, we should + // return DataLayout::getABITypeAlignment when there's no align + // value. + assert(Align != 0 && "An atomic LoadInst always has an explicit alignment"); + return Align; +} + +unsigned getAtomicOpAlign(StoreInst *SI) { + unsigned Align = SI->getAlignment(); + // In the future, if this IR restriction is relaxed, we should + // return DataLayout::getABITypeAlignment when there's no align + // value. + assert(Align != 0 && "An atomic StoreInst always has an explicit alignment"); + return Align; +} + +unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) { + // TODO(PR27168): This instruction has no alignment attribute, but unlike the + // default alignment for load/store, the default here is to assume + // it has NATURAL alignment, not DataLayout-specified alignment. + const DataLayout &DL = RMWI->getModule()->getDataLayout(); + return DL.getTypeStoreSize(RMWI->getValOperand()->getType()); +} + +unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) { + // TODO(PR27168): same comment as above. + const DataLayout &DL = CASI->getModule()->getDataLayout(); + return DL.getTypeStoreSize(CASI->getCompareOperand()->getType()); +} + +// Determine if a particular atomic operation has a supported size, +// and is of appropriate alignment, to be passed through for target +// lowering. (Versus turning into a __atomic libcall) +template <typename Inst> +bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) { + unsigned Size = getAtomicOpSize(I); + unsigned Align = getAtomicOpAlign(I); + return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8; +} + +} // end anonymous namespace + bool AtomicExpand::runOnFunction(Function &F) { if (!TM || !TM->getSubtargetImpl(F)->enableAtomicExpand()) return false; @@ -85,9 +181,10 @@ bool AtomicExpand::runOnFunction(Function &F) { // Changing control-flow while iterating through it is a bad idea, so gather a // list of all atomic instructions before we start. - for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { - if (I->isAtomic()) - AtomicInsts.push_back(&*I); + for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { + Instruction *I = &*II; + if (I->isAtomic() && !isa<FenceInst>(I)) + AtomicInsts.push_back(I); } bool MadeChange = false; @@ -96,41 +193,67 @@ bool AtomicExpand::runOnFunction(Function &F) { auto SI = dyn_cast<StoreInst>(I); auto RMWI = dyn_cast<AtomicRMWInst>(I); auto CASI = dyn_cast<AtomicCmpXchgInst>(I); - assert((LI || SI || RMWI || CASI || isa<FenceInst>(I)) && - "Unknown atomic instruction"); + assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction"); + + // If the Size/Alignment is not supported, replace with a libcall. + if (LI) { + if (!atomicSizeSupported(TLI, LI)) { + expandAtomicLoadToLibcall(LI); + MadeChange = true; + continue; + } + } else if (SI) { + if (!atomicSizeSupported(TLI, SI)) { + expandAtomicStoreToLibcall(SI); + MadeChange = true; + continue; + } + } else if (RMWI) { + if (!atomicSizeSupported(TLI, RMWI)) { + expandAtomicRMWToLibcall(RMWI); + MadeChange = true; + continue; + } + } else if (CASI) { + if (!atomicSizeSupported(TLI, CASI)) { + expandAtomicCASToLibcall(CASI); + MadeChange = true; + continue; + } + } - auto FenceOrdering = Monotonic; - bool IsStore, IsLoad; - if (TLI->getInsertFencesForAtomic()) { - if (LI && isAtLeastAcquire(LI->getOrdering())) { + if (TLI->shouldInsertFencesForAtomic(I)) { + auto FenceOrdering = AtomicOrdering::Monotonic; + bool IsStore, IsLoad; + if (LI && isAcquireOrStronger(LI->getOrdering())) { FenceOrdering = LI->getOrdering(); - LI->setOrdering(Monotonic); + LI->setOrdering(AtomicOrdering::Monotonic); IsStore = false; IsLoad = true; - } else if (SI && isAtLeastRelease(SI->getOrdering())) { + } else if (SI && isReleaseOrStronger(SI->getOrdering())) { FenceOrdering = SI->getOrdering(); - SI->setOrdering(Monotonic); + SI->setOrdering(AtomicOrdering::Monotonic); IsStore = true; IsLoad = false; - } else if (RMWI && (isAtLeastRelease(RMWI->getOrdering()) || - isAtLeastAcquire(RMWI->getOrdering()))) { + } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) || + isAcquireOrStronger(RMWI->getOrdering()))) { FenceOrdering = RMWI->getOrdering(); - RMWI->setOrdering(Monotonic); + RMWI->setOrdering(AtomicOrdering::Monotonic); IsStore = IsLoad = true; } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) && - (isAtLeastRelease(CASI->getSuccessOrdering()) || - isAtLeastAcquire(CASI->getSuccessOrdering()))) { + (isReleaseOrStronger(CASI->getSuccessOrdering()) || + isAcquireOrStronger(CASI->getSuccessOrdering()))) { // If a compare and swap is lowered to LL/SC, we can do smarter fence // insertion, with a stronger one on the success path than on the // failure path. As a result, fence insertion is directly done by // expandAtomicCmpXchg in that case. FenceOrdering = CASI->getSuccessOrdering(); - CASI->setSuccessOrdering(Monotonic); - CASI->setFailureOrdering(Monotonic); + CASI->setSuccessOrdering(AtomicOrdering::Monotonic); + CASI->setFailureOrdering(AtomicOrdering::Monotonic); IsStore = IsLoad = true; } - if (FenceOrdering != Monotonic) { + if (FenceOrdering != AtomicOrdering::Monotonic) { MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad); } } @@ -143,7 +266,7 @@ bool AtomicExpand::runOnFunction(Function &F) { assert(LI->getType()->isIntegerTy() && "invariant broken"); MadeChange = true; } - + MadeChange |= tryExpandAtomicLoad(LI); } else if (SI) { if (SI->getValueOperand()->getType()->isFloatingPointTy()) { @@ -168,8 +291,30 @@ bool AtomicExpand::runOnFunction(Function &F) { } else { MadeChange |= tryExpandAtomicRMW(RMWI); } - } else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI)) { - MadeChange |= expandAtomicCmpXchg(CASI); + } else if (CASI) { + // TODO: when we're ready to make the change at the IR level, we can + // extend convertCmpXchgToInteger for floating point too. + assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() && + "unimplemented - floating point not legal at IR level"); + if (CASI->getCompareOperand()->getType()->isPointerTy() ) { + // TODO: add a TLI hook to control this so that each target can + // convert to lowering the original type one at a time. + CASI = convertCmpXchgToIntegerType(CASI); + assert(CASI->getCompareOperand()->getType()->isIntegerTy() && + "invariant broken"); + MadeChange = true; + } + + unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; + unsigned ValueSize = getAtomicOpSize(CASI); + if (ValueSize < MinCASSize) { + assert(!TLI->shouldExpandAtomicCmpXchgInIR(CASI) && + "MinCmpXchgSizeInBits not yet supported for LL/SC expansions."); + expandPartwordCmpXchg(CASI); + } else { + if (TLI->shouldExpandAtomicCmpXchgInIR(CASI)) + MadeChange |= expandAtomicCmpXchg(CASI); + } } } return MadeChange; @@ -206,7 +351,7 @@ IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T, } /// Convert an atomic load of a non-integral type to an integer load of the -/// equivelent bitwidth. See the function comment on +/// equivalent bitwidth. See the function comment on /// convertAtomicStoreToIntegerType for background. LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { auto *M = LI->getModule(); @@ -237,9 +382,10 @@ bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { case TargetLoweringBase::AtomicExpansionKind::None: return false; case TargetLoweringBase::AtomicExpansionKind::LLSC: - return expandAtomicOpToLLSC( - LI, LI->getPointerOperand(), LI->getOrdering(), + expandAtomicOpToLLSC( + LI, LI->getType(), LI->getPointerOperand(), LI->getOrdering(), [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; }); + return true; case TargetLoweringBase::AtomicExpansionKind::LLOnly: return expandAtomicLoadToLL(LI); case TargetLoweringBase::AtomicExpansionKind::CmpXChg: @@ -283,7 +429,7 @@ bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { } /// Convert an atomic store of a non-integral type to an integer store of the -/// equivelent bitwidth. We used to not support floating point or vector +/// equivalent bitwidth. We used to not support floating point or vector /// atomics in the IR at all. The backends learned to deal with the bitcast /// idiom because that was the only way of expressing the notion of a atomic /// float or vector store. The long term plan is to teach each backend to @@ -380,32 +526,353 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { switch (TLI->shouldExpandAtomicRMWInIR(AI)) { case TargetLoweringBase::AtomicExpansionKind::None: return false; - case TargetLoweringBase::AtomicExpansionKind::LLSC: - return expandAtomicOpToLLSC(AI, AI->getPointerOperand(), AI->getOrdering(), - [&](IRBuilder<> &Builder, Value *Loaded) { - return performAtomicOp(AI->getOperation(), - Builder, Loaded, - AI->getValOperand()); - }); - case TargetLoweringBase::AtomicExpansionKind::CmpXChg: - return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); + case TargetLoweringBase::AtomicExpansionKind::LLSC: { + unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; + unsigned ValueSize = getAtomicOpSize(AI); + if (ValueSize < MinCASSize) { + llvm_unreachable( + "MinCmpXchgSizeInBits not yet supported for LL/SC architectures."); + } else { + auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) { + return performAtomicOp(AI->getOperation(), Builder, Loaded, + AI->getValOperand()); + }; + expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(), + AI->getOrdering(), PerformOp); + } + return true; + } + case TargetLoweringBase::AtomicExpansionKind::CmpXChg: { + unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; + unsigned ValueSize = getAtomicOpSize(AI); + if (ValueSize < MinCASSize) { + expandPartwordAtomicRMW(AI, + TargetLoweringBase::AtomicExpansionKind::CmpXChg); + } else { + expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); + } + return true; + } default: llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); } } -bool AtomicExpand::expandAtomicOpToLLSC( - Instruction *I, Value *Addr, AtomicOrdering MemOpOrder, - std::function<Value *(IRBuilder<> &, Value *)> PerformOp) { +namespace { + +/// Result values from createMaskInstrs helper. +struct PartwordMaskValues { + Type *WordType; + Type *ValueType; + Value *AlignedAddr; + Value *ShiftAmt; + Value *Mask; + Value *Inv_Mask; +}; +} // end anonymous namespace + +/// This is a helper function which builds instructions to provide +/// values necessary for partword atomic operations. It takes an +/// incoming address, Addr, and ValueType, and constructs the address, +/// shift-amounts and masks needed to work with a larger value of size +/// WordSize. +/// +/// AlignedAddr: Addr rounded down to a multiple of WordSize +/// +/// ShiftAmt: Number of bits to right-shift a WordSize value loaded +/// from AlignAddr for it to have the same value as if +/// ValueType was loaded from Addr. +/// +/// Mask: Value to mask with the value loaded from AlignAddr to +/// include only the part that would've been loaded from Addr. +/// +/// Inv_Mask: The inverse of Mask. + +static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, + Type *ValueType, Value *Addr, + unsigned WordSize) { + PartwordMaskValues Ret; + BasicBlock *BB = I->getParent(); Function *F = BB->getParent(); + Module *M = I->getModule(); + LLVMContext &Ctx = F->getContext(); + const DataLayout &DL = M->getDataLayout(); + + unsigned ValueSize = DL.getTypeStoreSize(ValueType); + + assert(ValueSize < WordSize); + + Ret.ValueType = ValueType; + Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8); + + Type *WordPtrType = + Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace()); + + Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx)); + Ret.AlignedAddr = Builder.CreateIntToPtr( + Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType, + "AlignedAddr"); + + Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB"); + if (DL.isLittleEndian()) { + // turn bytes into bits + Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3); + } else { + // turn bytes into bits, and count from the other side. + Ret.ShiftAmt = + Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3); + } + + Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt"); + Ret.Mask = Builder.CreateShl( + ConstantInt::get(Ret.WordType, (1 << ValueSize * 8) - 1), Ret.ShiftAmt, + "Mask"); + Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask"); + + return Ret; +} + +/// Emit IR to implement a masked version of a given atomicrmw +/// operation. (That is, only the bits under the Mask should be +/// affected by the operation) +static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op, + IRBuilder<> &Builder, Value *Loaded, + Value *Shifted_Inc, Value *Inc, + const PartwordMaskValues &PMV) { + switch (Op) { + case AtomicRMWInst::Xchg: { + Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); + Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc); + return FinalVal; + } + case AtomicRMWInst::Or: + case AtomicRMWInst::Xor: + // Or/Xor won't affect any other bits, so can just be done + // directly. + return performAtomicOp(Op, Builder, Loaded, Shifted_Inc); + case AtomicRMWInst::Add: + case AtomicRMWInst::Sub: + case AtomicRMWInst::And: + case AtomicRMWInst::Nand: { + // The other arithmetic ops need to be masked into place. + Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc); + Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask); + Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); + Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked); + return FinalVal; + } + case AtomicRMWInst::Max: + case AtomicRMWInst::Min: + case AtomicRMWInst::UMax: + case AtomicRMWInst::UMin: { + // Finally, comparison ops will operate on the full value, so + // truncate down to the original size, and expand out again after + // doing the operation. + Value *Loaded_Shiftdown = Builder.CreateTrunc( + Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType); + Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc); + Value *NewVal_Shiftup = Builder.CreateShl( + Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt); + Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask); + Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup); + return FinalVal; + } + default: + llvm_unreachable("Unknown atomic op"); + } +} + +/// Expand a sub-word atomicrmw operation into an appropriate +/// word-sized operation. +/// +/// It will create an LL/SC or cmpxchg loop, as appropriate, the same +/// way as a typical atomicrmw expansion. The only difference here is +/// that the operation inside of the loop must operate only upon a +/// part of the value. +void AtomicExpand::expandPartwordAtomicRMW( + AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) { + + assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg); + + AtomicOrdering MemOpOrder = AI->getOrdering(); + + IRBuilder<> Builder(AI); + + PartwordMaskValues PMV = + createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), + TLI->getMinCmpXchgSizeInBits() / 8); + + Value *ValOperand_Shifted = + Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), + PMV.ShiftAmt, "ValOperand_Shifted"); + + auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) { + return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded, + ValOperand_Shifted, AI->getValOperand(), PMV); + }; + + // TODO: When we're ready to support LLSC conversions too, use + // insertRMWLLSCLoop here for ExpansionKind==LLSC. + Value *OldResult = + insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder, + PerformPartwordOp, createCmpXchgInstFun); + Value *FinalOldResult = Builder.CreateTrunc( + Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType); + AI->replaceAllUsesWith(FinalOldResult); + AI->eraseFromParent(); +} + +void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { + // The basic idea here is that we're expanding a cmpxchg of a + // smaller memory size up to a word-sized cmpxchg. To do this, we + // need to add a retry-loop for strong cmpxchg, so that + // modifications to other parts of the word don't cause a spurious + // failure. + + // This generates code like the following: + // [[Setup mask values PMV.*]] + // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt + // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt + // %InitLoaded = load i32* %addr + // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask + // br partword.cmpxchg.loop + // partword.cmpxchg.loop: + // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ], + // [ %OldVal_MaskOut, %partword.cmpxchg.failure ] + // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted + // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted + // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp, + // i32 %FullWord_NewVal success_ordering failure_ordering + // %OldVal = extractvalue { i32, i1 } %NewCI, 0 + // %Success = extractvalue { i32, i1 } %NewCI, 1 + // br i1 %Success, label %partword.cmpxchg.end, + // label %partword.cmpxchg.failure + // partword.cmpxchg.failure: + // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask + // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut + // br i1 %ShouldContinue, label %partword.cmpxchg.loop, + // label %partword.cmpxchg.end + // partword.cmpxchg.end: + // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt + // %FinalOldVal = trunc i32 %tmp1 to i8 + // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0 + // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1 + + Value *Addr = CI->getPointerOperand(); + Value *Cmp = CI->getCompareOperand(); + Value *NewVal = CI->getNewValOperand(); + + BasicBlock *BB = CI->getParent(); + Function *F = BB->getParent(); + IRBuilder<> Builder(CI); + LLVMContext &Ctx = Builder.getContext(); + + const int WordSize = TLI->getMinCmpXchgSizeInBits() / 8; + + BasicBlock *EndBB = + BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end"); + auto FailureBB = + BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB); + auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB); + + // The split call above "helpfully" added a branch at the end of BB + // (to the wrong place). + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + + PartwordMaskValues PMV = createMaskInstrs( + Builder, CI, CI->getCompareOperand()->getType(), Addr, WordSize); + + // Shift the incoming values over, into the right location in the word. + Value *NewVal_Shifted = + Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt); + Value *Cmp_Shifted = + Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt); + + // Load the entire current word, and mask into place the expected and new + // values + LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr); + InitLoaded->setVolatile(CI->isVolatile()); + Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask); + Builder.CreateBr(LoopBB); + + // partword.cmpxchg.loop: + Builder.SetInsertPoint(LoopBB); + PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2); + Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB); + + // Mask/Or the expected and new values into place in the loaded word. + Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted); + Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted); + AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg( + PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(), + CI->getFailureOrdering(), CI->getSynchScope()); + NewCI->setVolatile(CI->isVolatile()); + // When we're building a strong cmpxchg, we need a loop, so you + // might think we could use a weak cmpxchg inside. But, using strong + // allows the below comparison for ShouldContinue, and we're + // expecting the underlying cmpxchg to be a machine instruction, + // which is strong anyways. + NewCI->setWeak(CI->isWeak()); + + Value *OldVal = Builder.CreateExtractValue(NewCI, 0); + Value *Success = Builder.CreateExtractValue(NewCI, 1); + + if (CI->isWeak()) + Builder.CreateBr(EndBB); + else + Builder.CreateCondBr(Success, EndBB, FailureBB); + + // partword.cmpxchg.failure: + Builder.SetInsertPoint(FailureBB); + // Upon failure, verify that the masked-out part of the loaded value + // has been modified. If it didn't, abort the cmpxchg, since the + // masked-in part must've. + Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask); + Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut); + Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB); + + // Add the second value to the phi from above + Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB); + + // partword.cmpxchg.end: + Builder.SetInsertPoint(CI); + + Value *FinalOldVal = Builder.CreateTrunc( + Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType); + Value *Res = UndefValue::get(CI->getType()); + Res = Builder.CreateInsertValue(Res, FinalOldVal, 0); + Res = Builder.CreateInsertValue(Res, Success, 1); + + CI->replaceAllUsesWith(Res); + CI->eraseFromParent(); +} + +void AtomicExpand::expandAtomicOpToLLSC( + Instruction *I, Type *ResultType, Value *Addr, AtomicOrdering MemOpOrder, + function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) { + IRBuilder<> Builder(I); + Value *Loaded = + insertRMWLLSCLoop(Builder, ResultType, Addr, MemOpOrder, PerformOp); + + I->replaceAllUsesWith(Loaded); + I->eraseFromParent(); +} + +Value *AtomicExpand::insertRMWLLSCLoop( + IRBuilder<> &Builder, Type *ResultTy, Value *Addr, + AtomicOrdering MemOpOrder, + function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) { + LLVMContext &Ctx = Builder.getContext(); + BasicBlock *BB = Builder.GetInsertBlock(); + Function *F = BB->getParent(); // Given: atomicrmw some_op iN* %addr, iN %incr ordering // // The standard expansion we produce is: // [...] - // fence? // atomicrmw.start: // %loaded = @load.linked(%addr) // %new = some_op iN %loaded, %incr @@ -413,17 +880,13 @@ bool AtomicExpand::expandAtomicOpToLLSC( // %try_again = icmp i32 ne %stored, 0 // br i1 %try_again, label %loop, label %atomicrmw.end // atomicrmw.end: - // fence? // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(I->getIterator(), "atomicrmw.end"); + BasicBlock *ExitBB = + BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); - // This grabs the DebugLoc from I. - IRBuilder<> Builder(I); - // The split call above "helpfully" added a branch at the end of BB (to the - // wrong place), but we might want a fence too. It's easiest to just remove - // the branch entirely. + // wrong place). std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); Builder.CreateBr(LoopBB); @@ -441,13 +904,53 @@ bool AtomicExpand::expandAtomicOpToLLSC( Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + return Loaded; +} - I->replaceAllUsesWith(Loaded); - I->eraseFromParent(); +/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of +/// the equivalent bitwidth. We used to not support pointer cmpxchg in the +/// IR. As a migration step, we convert back to what use to be the standard +/// way to represent a pointer cmpxchg so that we can update backends one by +/// one. +AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) { + auto *M = CI->getModule(); + Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(), + M->getDataLayout()); - return true; + IRBuilder<> Builder(CI); + + Value *Addr = CI->getPointerOperand(); + Type *PT = PointerType::get(NewTy, + Addr->getType()->getPointerAddressSpace()); + Value *NewAddr = Builder.CreateBitCast(Addr, PT); + + Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy); + Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy); + + + auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal, + CI->getSuccessOrdering(), + CI->getFailureOrdering(), + CI->getSynchScope()); + NewCI->setVolatile(CI->isVolatile()); + NewCI->setWeak(CI->isWeak()); + DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n"); + + Value *OldVal = Builder.CreateExtractValue(NewCI, 0); + Value *Succ = Builder.CreateExtractValue(NewCI, 1); + + OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType()); + + Value *Res = UndefValue::get(CI->getType()); + Res = Builder.CreateInsertValue(Res, OldVal, 0); + Res = Builder.CreateInsertValue(Res, Succ, 1); + + CI->replaceAllUsesWith(Res); + CI->eraseFromParent(); + return NewCI; } + bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); AtomicOrdering FailureOrder = CI->getFailureOrdering(); @@ -455,37 +958,71 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { BasicBlock *BB = CI->getParent(); Function *F = BB->getParent(); LLVMContext &Ctx = F->getContext(); - // If getInsertFencesForAtomic() returns true, then the target does not want - // to deal with memory orders, and emitLeading/TrailingFence should take care - // of everything. Otherwise, emitLeading/TrailingFence are no-op and we + // If shouldInsertFencesForAtomic() returns true, then the target does not + // want to deal with memory orders, and emitLeading/TrailingFence should take + // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we // should preserve the ordering. + bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI); AtomicOrdering MemOpOrder = - TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder; + ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder; + + // In implementations which use a barrier to achieve release semantics, we can + // delay emitting this barrier until we know a store is actually going to be + // attempted. The cost of this delay is that we need 2 copies of the block + // emitting the load-linked, affecting code size. + // + // Ideally, this logic would be unconditional except for the minsize check + // since in other cases the extra blocks naturally collapse down to the + // minimal loop. Unfortunately, this puts too much stress on later + // optimisations so we avoid emitting the extra logic in those cases too. + bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic && + SuccessOrder != AtomicOrdering::Monotonic && + SuccessOrder != AtomicOrdering::Acquire && + !F->optForMinSize(); + + // There's no overhead for sinking the release barrier in a weak cmpxchg, so + // do it even on minsize. + bool UseUnconditionalReleaseBarrier = F->optForMinSize() && !CI->isWeak(); // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord // // The full expansion we produce is: // [...] - // fence? // cmpxchg.start: - // %loaded = @load.linked(%addr) - // %should_store = icmp eq %loaded, %desired - // br i1 %should_store, label %cmpxchg.trystore, + // %unreleasedload = @load.linked(%addr) + // %should_store = icmp eq %unreleasedload, %desired + // br i1 %should_store, label %cmpxchg.fencedstore, // label %cmpxchg.nostore + // cmpxchg.releasingstore: + // fence? + // br label cmpxchg.trystore // cmpxchg.trystore: + // %loaded.trystore = phi [%unreleasedload, %releasingstore], + // [%releasedload, %cmpxchg.releasedload] // %stored = @store_conditional(%new, %addr) // %success = icmp eq i32 %stored, 0 - // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure + // br i1 %success, label %cmpxchg.success, + // label %cmpxchg.releasedload/%cmpxchg.failure + // cmpxchg.releasedload: + // %releasedload = @load.linked(%addr) + // %should_store = icmp eq %releasedload, %desired + // br i1 %should_store, label %cmpxchg.trystore, + // label %cmpxchg.failure // cmpxchg.success: // fence? // br label %cmpxchg.end // cmpxchg.nostore: + // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start], + // [%releasedload, + // %cmpxchg.releasedload/%cmpxchg.trystore] // @load_linked_fail_balance()? // br label %cmpxchg.failure // cmpxchg.failure: // fence? // br label %cmpxchg.end // cmpxchg.end: + // %loaded = phi [%loaded.nostore, %cmpxchg.failure], + // [%loaded.trystore, %cmpxchg.trystore] // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 @@ -494,8 +1031,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB); auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB); - auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); - auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); + auto ReleasedLoadBB = + BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB); + auto TryStoreBB = + BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB); + auto ReleasingStoreBB = + BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB); + auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB); // This grabs the DebugLoc from CI IRBuilder<> Builder(CI); @@ -505,32 +1047,55 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); - TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true, - /*IsLoad=*/true); - Builder.CreateBr(LoopBB); + if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier) + TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true, + /*IsLoad=*/true); + Builder.CreateBr(StartBB); // Start the main loop block now that we've taken care of the preliminaries. - Builder.SetInsertPoint(LoopBB); - Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); - Value *ShouldStore = - Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); + Builder.SetInsertPoint(StartBB); + Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *ShouldStore = Builder.CreateICmpEQ( + UnreleasedLoad, CI->getCompareOperand(), "should_store"); // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). - Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); + Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB); + + Builder.SetInsertPoint(ReleasingStoreBB); + if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier) + TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true, + /*IsLoad=*/true); + Builder.CreateBr(TryStoreBB); Builder.SetInsertPoint(TryStoreBB); Value *StoreSuccess = TLI->emitStoreConditional( Builder, CI->getNewValOperand(), Addr, MemOpOrder); StoreSuccess = Builder.CreateICmpEQ( StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); + BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; Builder.CreateCondBr(StoreSuccess, SuccessBB, - CI->isWeak() ? FailureBB : LoopBB); - - // Make sure later instructions don't get reordered with a fence if necessary. + CI->isWeak() ? FailureBB : RetryBB); + + Builder.SetInsertPoint(ReleasedLoadBB); + Value *SecondLoad; + if (HasReleasedLoadBB) { + SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(), + "should_store"); + + // If the cmpxchg doesn't actually need any ordering when it fails, we can + // jump straight past that fence instruction (if it exists). + Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); + } else + Builder.CreateUnreachable(); + + // Make sure later instructions don't get reordered with a fence if + // necessary. Builder.SetInsertPoint(SuccessBB); - TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true, - /*IsLoad=*/true); + if (ShouldInsertFencesForAtomic) + TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true, + /*IsLoad=*/true); Builder.CreateBr(ExitBB); Builder.SetInsertPoint(NoStoreBB); @@ -541,20 +1106,43 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.CreateBr(FailureBB); Builder.SetInsertPoint(FailureBB); - TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true, - /*IsLoad=*/true); + if (ShouldInsertFencesForAtomic) + TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true, + /*IsLoad=*/true); Builder.CreateBr(ExitBB); // Finally, we have control-flow based knowledge of whether the cmpxchg // succeeded or not. We expose this to later passes by converting any - // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI. - - // Setup the builder so we can create any PHIs we need. + // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate + // PHI. Builder.SetInsertPoint(ExitBB, ExitBB->begin()); PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); + // Setup the builder so we can create any PHIs we need. + Value *Loaded; + if (!HasReleasedLoadBB) + Loaded = UnreleasedLoad; + else { + Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin()); + PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); + TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB); + TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB); + + Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin()); + PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); + NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB); + NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB); + + Builder.SetInsertPoint(ExitBB, ++ExitBB->begin()); + PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); + ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB); + ExitLoaded->addIncoming(NoStoreLoaded, FailureBB); + + Loaded = ExitLoaded; + } + // Look for any users of the cmpxchg that are just comparing the loaded value // against the desired one, and replace them with the CFG-derived version. SmallVector<ExtractValueInst *, 2> PrunedInsts; @@ -620,16 +1208,14 @@ bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { return false; } -bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, - CreateCmpXchgInstFun CreateCmpXchg) { - assert(AI); - - AtomicOrdering MemOpOrder = - AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); - Value *Addr = AI->getPointerOperand(); - BasicBlock *BB = AI->getParent(); +Value *AtomicExpand::insertRMWCmpXchgLoop( + IRBuilder<> &Builder, Type *ResultTy, Value *Addr, + AtomicOrdering MemOpOrder, + function_ref<Value *(IRBuilder<> &, Value *)> PerformOp, + CreateCmpXchgInstFun CreateCmpXchg) { + LLVMContext &Ctx = Builder.getContext(); + BasicBlock *BB = Builder.GetInsertBlock(); Function *F = BB->getParent(); - LLVMContext &Ctx = F->getContext(); // Given: atomicrmw some_op iN* %addr, iN %incr ordering // @@ -646,34 +1232,34 @@ bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, // br i1 %success, label %atomicrmw.end, label %loop // atomicrmw.end: // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(AI->getIterator(), "atomicrmw.end"); + BasicBlock *ExitBB = + BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); - // This grabs the DebugLoc from AI. - IRBuilder<> Builder(AI); - // The split call above "helpfully" added a branch at the end of BB (to the // wrong place), but we want a load. It's easiest to just remove // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); - LoadInst *InitLoaded = Builder.CreateLoad(Addr); + LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr); // Atomics require at least natural alignment. - InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits() / 8); + InitLoaded->setAlignment(ResultTy->getPrimitiveSizeInBits() / 8); Builder.CreateBr(LoopBB); // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(LoopBB); - PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); + PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded"); Loaded->addIncoming(InitLoaded, BB); - Value *NewVal = - performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); + Value *NewVal = PerformOp(Builder, Loaded); Value *NewLoaded = nullptr; Value *Success = nullptr; - CreateCmpXchg(Builder, Addr, Loaded, NewVal, MemOpOrder, + CreateCmpXchg(Builder, Addr, Loaded, NewVal, + MemOpOrder == AtomicOrdering::Unordered + ? AtomicOrdering::Monotonic + : MemOpOrder, Success, NewLoaded); assert(Success && NewLoaded); @@ -682,9 +1268,373 @@ bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, Builder.CreateCondBr(Success, ExitBB, LoopBB); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + return NewLoaded; +} - AI->replaceAllUsesWith(NewLoaded); +// Note: This function is exposed externally by AtomicExpandUtils.h +bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, + CreateCmpXchgInstFun CreateCmpXchg) { + IRBuilder<> Builder(AI); + Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop( + Builder, AI->getType(), AI->getPointerOperand(), AI->getOrdering(), + [&](IRBuilder<> &Builder, Value *Loaded) { + return performAtomicOp(AI->getOperation(), Builder, Loaded, + AI->getValOperand()); + }, + CreateCmpXchg); + + AI->replaceAllUsesWith(Loaded); AI->eraseFromParent(); + return true; +} +// In order to use one of the sized library calls such as +// __atomic_fetch_add_4, the alignment must be sufficient, the size +// must be one of the potentially-specialized sizes, and the value +// type must actually exist in C on the target (otherwise, the +// function wouldn't actually be defined.) +static bool canUseSizedAtomicCall(unsigned Size, unsigned Align, + const DataLayout &DL) { + // TODO: "LargestSize" is an approximation for "largest type that + // you can express in C". It seems to be the case that int128 is + // supported on all 64-bit platforms, otherwise only up to 64-bit + // integers are supported. If we get this wrong, then we'll try to + // call a sized libcall that doesn't actually exist. There should + // really be some more reliable way in LLVM of determining integer + // sizes which are valid in the target's C ABI... + unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8; + return Align >= Size && + (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) && + Size <= LargestSize; +} + +void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) { + static const RTLIB::Libcall Libcalls[6] = { + RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2, + RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16}; + unsigned Size = getAtomicOpSize(I); + unsigned Align = getAtomicOpAlign(I); + + bool expanded = expandAtomicOpToLibcall( + I, Size, Align, I->getPointerOperand(), nullptr, nullptr, + I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); + (void)expanded; + assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load"); +} + +void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) { + static const RTLIB::Libcall Libcalls[6] = { + RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2, + RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16}; + unsigned Size = getAtomicOpSize(I); + unsigned Align = getAtomicOpAlign(I); + + bool expanded = expandAtomicOpToLibcall( + I, Size, Align, I->getPointerOperand(), I->getValueOperand(), nullptr, + I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); + (void)expanded; + assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store"); +} + +void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) { + static const RTLIB::Libcall Libcalls[6] = { + RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1, + RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4, + RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16}; + unsigned Size = getAtomicOpSize(I); + unsigned Align = getAtomicOpAlign(I); + + bool expanded = expandAtomicOpToLibcall( + I, Size, Align, I->getPointerOperand(), I->getNewValOperand(), + I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(), + Libcalls); + (void)expanded; + assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor CAS"); +} + +static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) { + static const RTLIB::Libcall LibcallsXchg[6] = { + RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1, + RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4, + RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16}; + static const RTLIB::Libcall LibcallsAdd[6] = { + RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1, + RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4, + RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16}; + static const RTLIB::Libcall LibcallsSub[6] = { + RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1, + RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4, + RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16}; + static const RTLIB::Libcall LibcallsAnd[6] = { + RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1, + RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4, + RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16}; + static const RTLIB::Libcall LibcallsOr[6] = { + RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1, + RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4, + RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16}; + static const RTLIB::Libcall LibcallsXor[6] = { + RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1, + RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4, + RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16}; + static const RTLIB::Libcall LibcallsNand[6] = { + RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1, + RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4, + RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16}; + + switch (Op) { + case AtomicRMWInst::BAD_BINOP: + llvm_unreachable("Should not have BAD_BINOP."); + case AtomicRMWInst::Xchg: + return makeArrayRef(LibcallsXchg); + case AtomicRMWInst::Add: + return makeArrayRef(LibcallsAdd); + case AtomicRMWInst::Sub: + return makeArrayRef(LibcallsSub); + case AtomicRMWInst::And: + return makeArrayRef(LibcallsAnd); + case AtomicRMWInst::Or: + return makeArrayRef(LibcallsOr); + case AtomicRMWInst::Xor: + return makeArrayRef(LibcallsXor); + case AtomicRMWInst::Nand: + return makeArrayRef(LibcallsNand); + case AtomicRMWInst::Max: + case AtomicRMWInst::Min: + case AtomicRMWInst::UMax: + case AtomicRMWInst::UMin: + // No atomic libcalls are available for max/min/umax/umin. + return {}; + } + llvm_unreachable("Unexpected AtomicRMW operation."); +} + +void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) { + ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation()); + + unsigned Size = getAtomicOpSize(I); + unsigned Align = getAtomicOpAlign(I); + + bool Success = false; + if (!Libcalls.empty()) + Success = expandAtomicOpToLibcall( + I, Size, Align, I->getPointerOperand(), I->getValOperand(), nullptr, + I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls); + + // The expansion failed: either there were no libcalls at all for + // the operation (min/max), or there were only size-specialized + // libcalls (add/sub/etc) and we needed a generic. So, expand to a + // CAS libcall, via a CAS loop, instead. + if (!Success) { + expandAtomicRMWToCmpXchg(I, [this](IRBuilder<> &Builder, Value *Addr, + Value *Loaded, Value *NewVal, + AtomicOrdering MemOpOrder, + Value *&Success, Value *&NewLoaded) { + // Create the CAS instruction normally... + AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg( + Addr, Loaded, NewVal, MemOpOrder, + AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); + Success = Builder.CreateExtractValue(Pair, 1, "success"); + NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); + + // ...and then expand the CAS into a libcall. + expandAtomicCASToLibcall(Pair); + }); + } +} + +// A helper routine for the above expandAtomic*ToLibcall functions. +// +// 'Libcalls' contains an array of enum values for the particular +// ATOMIC libcalls to be emitted. All of the other arguments besides +// 'I' are extracted from the Instruction subclass by the +// caller. Depending on the particular call, some will be null. +bool AtomicExpand::expandAtomicOpToLibcall( + Instruction *I, unsigned Size, unsigned Align, Value *PointerOperand, + Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering, + AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) { + assert(Libcalls.size() == 6); + + LLVMContext &Ctx = I->getContext(); + Module *M = I->getModule(); + const DataLayout &DL = M->getDataLayout(); + IRBuilder<> Builder(I); + IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front()); + + bool UseSizedLibcall = canUseSizedAtomicCall(Size, Align, DL); + Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8); + + unsigned AllocaAlignment = DL.getPrefTypeAlignment(SizedIntTy); + + // TODO: the "order" argument type is "int", not int32. So + // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints. + ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size); + assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO"); + Constant *OrderingVal = + ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering)); + Constant *Ordering2Val = nullptr; + if (CASExpected) { + assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO"); + Ordering2Val = + ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2)); + } + bool HasResult = I->getType() != Type::getVoidTy(Ctx); + + RTLIB::Libcall RTLibType; + if (UseSizedLibcall) { + switch (Size) { + case 1: RTLibType = Libcalls[1]; break; + case 2: RTLibType = Libcalls[2]; break; + case 4: RTLibType = Libcalls[3]; break; + case 8: RTLibType = Libcalls[4]; break; + case 16: RTLibType = Libcalls[5]; break; + } + } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) { + RTLibType = Libcalls[0]; + } else { + // Can't use sized function, and there's no generic for this + // operation, so give up. + return false; + } + + // Build up the function call. There's two kinds. First, the sized + // variants. These calls are going to be one of the following (with + // N=1,2,4,8,16): + // iN __atomic_load_N(iN *ptr, int ordering) + // void __atomic_store_N(iN *ptr, iN val, int ordering) + // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering) + // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired, + // int success_order, int failure_order) + // + // Note that these functions can be used for non-integer atomic + // operations, the values just need to be bitcast to integers on the + // way in and out. + // + // And, then, the generic variants. They look like the following: + // void __atomic_load(size_t size, void *ptr, void *ret, int ordering) + // void __atomic_store(size_t size, void *ptr, void *val, int ordering) + // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret, + // int ordering) + // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected, + // void *desired, int success_order, + // int failure_order) + // + // The different signatures are built up depending on the + // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult' + // variables. + + AllocaInst *AllocaCASExpected = nullptr; + Value *AllocaCASExpected_i8 = nullptr; + AllocaInst *AllocaValue = nullptr; + Value *AllocaValue_i8 = nullptr; + AllocaInst *AllocaResult = nullptr; + Value *AllocaResult_i8 = nullptr; + + Type *ResultTy; + SmallVector<Value *, 6> Args; + AttributeSet Attr; + + // 'size' argument. + if (!UseSizedLibcall) { + // Note, getIntPtrType is assumed equivalent to size_t. + Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size)); + } + + // 'ptr' argument. + Value *PtrVal = + Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx)); + Args.push_back(PtrVal); + + // 'expected' argument, if present. + if (CASExpected) { + AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType()); + AllocaCASExpected->setAlignment(AllocaAlignment); + AllocaCASExpected_i8 = + Builder.CreateBitCast(AllocaCASExpected, Type::getInt8PtrTy(Ctx)); + Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64); + Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment); + Args.push_back(AllocaCASExpected_i8); + } + + // 'val' argument ('desired' for cas), if present. + if (ValueOperand) { + if (UseSizedLibcall) { + Value *IntValue = + Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy); + Args.push_back(IntValue); + } else { + AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType()); + AllocaValue->setAlignment(AllocaAlignment); + AllocaValue_i8 = + Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx)); + Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64); + Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment); + Args.push_back(AllocaValue_i8); + } + } + + // 'ret' argument. + if (!CASExpected && HasResult && !UseSizedLibcall) { + AllocaResult = AllocaBuilder.CreateAlloca(I->getType()); + AllocaResult->setAlignment(AllocaAlignment); + AllocaResult_i8 = + Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx)); + Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64); + Args.push_back(AllocaResult_i8); + } + + // 'ordering' ('success_order' for cas) argument. + Args.push_back(OrderingVal); + + // 'failure_order' argument, if present. + if (Ordering2Val) + Args.push_back(Ordering2Val); + + // Now, the return type. + if (CASExpected) { + ResultTy = Type::getInt1Ty(Ctx); + Attr = Attr.addAttribute(Ctx, AttributeSet::ReturnIndex, Attribute::ZExt); + } else if (HasResult && UseSizedLibcall) + ResultTy = SizedIntTy; + else + ResultTy = Type::getVoidTy(Ctx); + + // Done with setting up arguments and return types, create the call: + SmallVector<Type *, 6> ArgTys; + for (Value *Arg : Args) + ArgTys.push_back(Arg->getType()); + FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false); + Constant *LibcallFn = + M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr); + CallInst *Call = Builder.CreateCall(LibcallFn, Args); + Call->setAttributes(Attr); + Value *Result = Call; + + // And then, extract the results... + if (ValueOperand && !UseSizedLibcall) + Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64); + + if (CASExpected) { + // The final result from the CAS is {load of 'expected' alloca, bool result + // from call} + Type *FinalResultTy = I->getType(); + Value *V = UndefValue::get(FinalResultTy); + Value *ExpectedOut = + Builder.CreateAlignedLoad(AllocaCASExpected, AllocaAlignment); + Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64); + V = Builder.CreateInsertValue(V, ExpectedOut, 0); + V = Builder.CreateInsertValue(V, Result, 1); + I->replaceAllUsesWith(V); + } else if (HasResult) { + Value *V; + if (UseSizedLibcall) + V = Builder.CreateBitOrPointerCast(Result, I->getType()); + else { + V = Builder.CreateAlignedLoad(AllocaResult, AllocaAlignment); + Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64); + } + I->replaceAllUsesWith(V); + } + I->eraseFromParent(); return true; } diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp index df5cac5..5dacbf9 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp @@ -27,10 +27,11 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -90,7 +91,7 @@ INITIALIZE_PASS(BranchFolderPass, "branch-folder", "Control Flow Optimizer", false, false) bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { - if (skipOptnoneFunction(*MF.getFunction())) + if (skipFunction(*MF.getFunction())) return false; TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); @@ -98,8 +99,9 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { // HW that requires structurized CFG. bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && PassConfig->getEnableTailMerge(); - BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, - getAnalysis<MachineBlockFrequencyInfo>(), + BranchFolder::MBFIWrapper MBBFreqInfo( + getAnalysis<MachineBlockFrequencyInfo>()); + BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo, getAnalysis<MachineBranchProbabilityInfo>()); return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(), MF.getSubtarget().getRegisterInfo(), @@ -107,7 +109,7 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { } BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, - const MachineBlockFrequencyInfo &FreqInfo, + MBFIWrapper &FreqInfo, const MachineBranchProbabilityInfo &ProbInfo) : EnableHoistCommonCode(CommonHoist), MBBFreqInfo(FreqInfo), MBPI(ProbInfo) { @@ -135,6 +137,8 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { // Remove the block. MF->erase(MBB); FuncletMembership.erase(MBB); + if (MLI) + MLI->removeBlock(MBB); } /// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def @@ -167,7 +171,7 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { MachineBasicBlock::iterator FirstTerm = I; while (I != MBB->end()) { - if (!TII->isUnpredicatedTerminator(I)) + if (!TII->isUnpredicatedTerminator(*I)) return false; // See if it uses any of the implicitly defined registers. for (const MachineOperand &MO : I->operands()) { @@ -191,25 +195,26 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { } /// OptimizeFunction - Perhaps branch folding, tail merging and other -/// CFG optimizations on the given function. +/// CFG optimizations on the given function. Block placement changes the layout +/// and may create new tail merging opportunities. bool BranchFolder::OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii, const TargetRegisterInfo *tri, - MachineModuleInfo *mmi) { + MachineModuleInfo *mmi, + MachineLoopInfo *mli, bool AfterPlacement) { if (!tii) return false; TriedMerging.clear(); + AfterBlockPlacement = AfterPlacement; TII = tii; TRI = tri; MMI = mmi; - RS = nullptr; + MLI = mli; - // Use a RegScavenger to help update liveness when required. MachineRegisterInfo &MRI = MF.getRegInfo(); - if (MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF)) - RS = new RegScavenger(); - else + UpdateLiveIns = MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF); + if (!UpdateLiveIns) MRI.invalidateLiveness(); // Fix CFG. The later algorithms expect it to be right. @@ -217,7 +222,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, for (MachineBasicBlock &MBB : MF) { MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; - if (!TII->AnalyzeBranch(MBB, TBB, FBB, Cond, true)) + if (!TII->analyzeBranch(MBB, TBB, FBB, Cond, true)) MadeChange |= MBB.CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); MadeChange |= OptimizeImpDefsBlock(&MBB); } @@ -228,7 +233,10 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, bool MadeChangeThisIteration = true; while (MadeChangeThisIteration) { MadeChangeThisIteration = TailMergeBlocks(MF); - MadeChangeThisIteration |= OptimizeBranches(MF); + // No need to clean up if tail merging does not change anything after the + // block placement. + if (!AfterBlockPlacement || MadeChangeThisIteration) + MadeChangeThisIteration |= OptimizeBranches(MF); if (EnableHoistCommonCode) MadeChangeThisIteration |= HoistCommonCode(MF); MadeChange |= MadeChangeThisIteration; @@ -237,10 +245,8 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, // See if any jump tables have become dead as the code generator // did its thing. MachineJumpTableInfo *JTI = MF.getJumpTableInfo(); - if (!JTI) { - delete RS; + if (!JTI) return MadeChange; - } // Walk the function to find jump tables that are live. BitVector JTIsLive(JTI->getJumpTables().size()); @@ -262,7 +268,6 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, MadeChange = true; } - delete RS; return MadeChange; } @@ -271,10 +276,10 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, //===----------------------------------------------------------------------===// /// HashMachineInstr - Compute a hash value for MI and its operands. -static unsigned HashMachineInstr(const MachineInstr *MI) { - unsigned Hash = MI->getOpcode(); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &Op = MI->getOperand(i); +static unsigned HashMachineInstr(const MachineInstr &MI) { + unsigned Hash = MI.getOpcode(); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &Op = MI.getOperand(i); // Merge in bits from the operand if easy. We can't use MachineOperand's // hash_code here because it's not deterministic and we sort by hash value @@ -311,12 +316,12 @@ static unsigned HashMachineInstr(const MachineInstr *MI) { } /// HashEndOfMBB - Hash the last instruction in the MBB. -static unsigned HashEndOfMBB(const MachineBasicBlock *MBB) { - MachineBasicBlock::const_iterator I = MBB->getLastNonDebugInstr(); - if (I == MBB->end()) +static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) { + MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) return 0; - return HashMachineInstr(I); + return HashMachineInstr(*I); } /// ComputeCommonTailLength - Given two machine basic blocks, compute the number @@ -357,7 +362,7 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, --I2; } // I1, I2==first (untested) non-DBGs preceding known match - if (!I1->isIdenticalTo(I2) || + if (!I1->isIdenticalTo(*I2) || // FIXME: This check is dubious. It's used to get around a problem where // people incorrectly expect inline asm directives to remain in the same // relative order. This is untenable because normal compiler @@ -394,15 +399,27 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, return TailLen; } -void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB, - MachineBasicBlock *NewMBB) { - if (RS) { - RS->enterBasicBlock(CurMBB); - if (!CurMBB->empty()) - RS->forward(std::prev(CurMBB->end())); - for (unsigned int i = 1, e = TRI->getNumRegs(); i != e; i++) - if (RS->isRegUsed(i, false)) - NewMBB->addLiveIn(i); +void BranchFolder::computeLiveIns(MachineBasicBlock &MBB) { + if (!UpdateLiveIns) + return; + + LiveRegs.init(TRI); + LiveRegs.addLiveOutsNoPristines(MBB); + for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) + LiveRegs.stepBackward(MI); + + for (unsigned Reg : LiveRegs) { + // Skip the register if we are about to add one of its super registers. + bool ContainsSuperReg = false; + for (MCSuperRegIterator SReg(Reg, TRI); SReg.isValid(); ++SReg) { + if (LiveRegs.contains(*SReg)) { + ContainsSuperReg = true; + break; + } + } + if (ContainsSuperReg) + continue; + MBB.addLiveIn(Reg); } } @@ -410,12 +427,9 @@ void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB, /// after it, replacing it with an unconditional branch to NewDest. void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest) { - MachineBasicBlock *CurMBB = OldInst->getParent(); - TII->ReplaceTailWithBranchTo(OldInst, NewDest); - // For targets that use the register scavenger, we must maintain LiveIns. - MaintainLiveIns(CurMBB, NewDest); + computeLiveIns(*NewDest); ++NumTailMerge; } @@ -445,16 +459,22 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, // Splice the code over. NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end()); + // NewMBB belongs to the same loop as CurMBB. + if (MLI) + if (MachineLoop *ML = MLI->getLoopFor(&CurMBB)) + ML->addBasicBlockToLoop(NewMBB, MLI->getBase()); + // NewMBB inherits CurMBB's block frequency. MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB)); - // For targets that use the register scavenger, we must maintain LiveIns. - MaintainLiveIns(&CurMBB, NewMBB); + computeLiveIns(*NewMBB); // Add the new block to the funclet. const auto &FuncletI = FuncletMembership.find(&CurMBB); - if (FuncletI != FuncletMembership.end()) - FuncletMembership[NewMBB] = FuncletI->second; + if (FuncletI != FuncletMembership.end()) { + auto n = FuncletI->second; + FuncletMembership[NewMBB] = n; + } return NewMBB; } @@ -488,8 +508,7 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; DebugLoc dl; // FIXME: this is nowhere - if (I != MF->end() && - !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) { + if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) { MachineBasicBlock *NextBB = &*I; if (TBB == NextBB && !Cond.empty() && !FBB) { if (!TII->ReverseBranchCondition(Cond)) { @@ -537,6 +556,18 @@ void BranchFolder::MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB, MergedBBFreq[MBB] = F; } +raw_ostream & +BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS, + const MachineBasicBlock *MBB) const { + return MBFI.printBlockFreq(OS, getBlockFreq(MBB)); +} + +raw_ostream & +BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS, + const BlockFrequency Freq) const { + return MBFI.printBlockFreq(OS, Freq); +} + /// CountTerminators - Count the number of terminators in the given /// block and set I to the position of the first non-terminator, if there /// is one, or MBB->end() otherwise. @@ -745,11 +776,8 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, } static void -removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, - MachineBasicBlock &MBBCommon) { - // Remove MMOs from memory operations in the common block - // when they do not match the ones from the block being tail-merged. - // This ensures later passes conservatively compute dependencies. +mergeOperations(MachineBasicBlock::iterator MBBIStartPos, + MachineBasicBlock &MBBCommon) { MachineBasicBlock *MBB = MBBIStartPos->getParent(); // Note CommonTailLen does not necessarily matches the size of // the common BB nor all its instructions because of debug @@ -777,24 +805,33 @@ removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, assert(MBBICommon != MBBIECommon && "Reached BB end within common tail length!"); - assert(MBBICommon->isIdenticalTo(&*MBBI) && "Expected matching MIIs!"); + assert(MBBICommon->isIdenticalTo(*MBBI) && "Expected matching MIIs!"); + // Merge MMOs from memory operations in the common block. if (MBBICommon->mayLoad() || MBBICommon->mayStore()) MBBICommon->setMemRefs(MBBICommon->mergeMemRefsWith(*MBBI)); + // Drop undef flags if they aren't present in all merged instructions. + for (unsigned I = 0, E = MBBICommon->getNumOperands(); I != E; ++I) { + MachineOperand &MO = MBBICommon->getOperand(I); + if (MO.isReg() && MO.isUndef()) { + const MachineOperand &OtherMO = MBBI->getOperand(I); + if (!OtherMO.isUndef()) + MO.setIsUndef(false); + } + } ++MBBI; ++MBBICommon; } } -// See if any of the blocks in MergePotentials (which all have a common single -// successor, or all have no successor) can be tail-merged. If there is a -// successor, any blocks in MergePotentials that are not tail-merged and -// are not immediately before Succ must have an unconditional branch to -// Succ added (but the predecessor/successor lists need no adjustment). -// The lone predecessor of Succ that falls through into Succ, +// See if any of the blocks in MergePotentials (which all have SuccBB as a +// successor, or all have no successor if it is null) can be tail-merged. +// If there is a successor, any blocks in MergePotentials that are not +// tail-merged and are not immediately before Succ must have an unconditional +// branch to Succ added (but the predecessor/successor lists need no +// adjustment). The lone predecessor of Succ that falls through into Succ, // if any, is given in PredBB. - bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, MachineBasicBlock *PredBB) { bool MadeChange = false; @@ -888,7 +925,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); - // Recompute commont tail MBB's edge weights and block frequency. + // Recompute common tail MBB's edge weights and block frequency. setCommonTailEdgeWeights(*MBB); // MBB is common tail. Adjust all other BB's to jump to this one. @@ -900,8 +937,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, continue; DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber() << (i == e-1 ? "" : ", ")); - // Remove MMOs from memory operations as needed. - removeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), *MBB); + // Merge operations (MMOs, undef flags) + mergeOperations(SameTails[i].getTailStartPos(), *MBB); // Hack the end off BB i, making it jump to BB commonTailIndex instead. ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB); // BB i is no longer a predecessor of SuccBB; remove it from the worklist. @@ -920,23 +957,27 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { if (!EnableTailMerge) return MadeChange; // First find blocks with no successors. - MergePotentials.clear(); - for (MachineBasicBlock &MBB : MF) { - if (MergePotentials.size() == TailMergeThreshold) - break; - if (!TriedMerging.count(&MBB) && MBB.succ_empty()) - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(&MBB), &MBB)); - } + // Block placement does not create new tail merging opportunities for these + // blocks. + if (!AfterBlockPlacement) { + MergePotentials.clear(); + for (MachineBasicBlock &MBB : MF) { + if (MergePotentials.size() == TailMergeThreshold) + break; + if (!TriedMerging.count(&MBB) && MBB.succ_empty()) + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB)); + } - // If this is a large problem, avoid visiting the same basic blocks - // multiple times. - if (MergePotentials.size() == TailMergeThreshold) - for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) - TriedMerging.insert(MergePotentials[i].getBlock()); + // If this is a large problem, avoid visiting the same basic blocks + // multiple times. + if (MergePotentials.size() == TailMergeThreshold) + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + TriedMerging.insert(MergePotentials[i].getBlock()); - // See if we can do any tail merging on those. - if (MergePotentials.size() >= 2) - MadeChange |= TryTailMergeBlocks(nullptr, nullptr); + // See if we can do any tail merging on those. + if (MergePotentials.size() >= 2) + MadeChange |= TryTailMergeBlocks(nullptr, nullptr); + } // Look at blocks (IBB) with multiple predecessors (PBB). // We change each predecessor to a canonical form, by @@ -964,6 +1005,24 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { MachineBasicBlock *IBB = &*I; MachineBasicBlock *PredBB = &*std::prev(I); MergePotentials.clear(); + MachineLoop *ML; + + // Bail if merging after placement and IBB is the loop header because + // -- If merging predecessors that belong to the same loop as IBB, the + // common tail of merged predecessors may become the loop top if block + // placement is called again and the predecessors may branch to this common + // tail and require more branches. This can be relaxed if + // MachineBlockPlacement::findBestLoopTop is more flexible. + // --If merging predecessors that do not belong to the same loop as IBB, the + // loop info of IBB's loop and the other loops may be affected. Calling the + // block placement again may make big change to the layout and eliminate the + // reason to do tail merging here. + if (AfterBlockPlacement && MLI) { + ML = MLI->getLoopFor(IBB); + if (ML && IBB == ML->getHeader()) + continue; + } + for (MachineBasicBlock *PBB : I->predecessors()) { if (MergePotentials.size() == TailMergeThreshold) break; @@ -983,9 +1042,16 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { if (PBB->hasEHPadSuccessor()) continue; + // After block placement, only consider predecessors that belong to the + // same loop as IBB. The reason is the same as above when skipping loop + // header. + if (AfterBlockPlacement && MLI) + if (ML != MLI->getLoopFor(PBB)) + continue; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; - if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { + if (!TII->analyzeBranch(*PBB, TBB, FBB, Cond, true)) { // Failing case: IBB is the target of a cbr, and we cannot reverse the // branch. SmallVector<MachineOperand, 4> NewCond(Cond); @@ -1033,7 +1099,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { NewCond, dl); } - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), PBB)); + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(*PBB), PBB)); } } @@ -1211,7 +1277,7 @@ ReoptimizeBlock: // where a BB jumps to more than one landing pad. // TODO: Is it ever worth rewriting predecessors which don't already // jump to a landing pad, and so can safely jump to the fallthrough? - } else { + } else if (MBB->isSuccessor(&*FallThrough)) { // Rewrite all predecessors of the old block to go to the fallthrough // instead. while (!MBB->pred_empty()) { @@ -1234,7 +1300,7 @@ ReoptimizeBlock: MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; SmallVector<MachineOperand, 4> PriorCond; bool PriorUnAnalyzable = - TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true); + TII->analyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true); if (!PriorUnAnalyzable) { // If the CFG for the prior block has extra edges, remove them. MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB, @@ -1275,11 +1341,11 @@ ReoptimizeBlock: // DBG_VALUE at the beginning of MBB. while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end() && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) { - if (!MBBIter->isIdenticalTo(PrevBBIter)) + if (!MBBIter->isIdenticalTo(*PrevBBIter)) break; - MachineInstr *DuplicateDbg = MBBIter; + MachineInstr &DuplicateDbg = *MBBIter; ++MBBIter; -- PrevBBIter; - DuplicateDbg->eraseFromParent(); + DuplicateDbg.eraseFromParent(); } } PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end()); @@ -1371,7 +1437,8 @@ ReoptimizeBlock: // Analyze the branch in the current block. MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr; SmallVector<MachineOperand, 4> CurCond; - bool CurUnAnalyzable= TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true); + bool CurUnAnalyzable = + TII->analyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true); if (!CurUnAnalyzable) { // If the CFG for the prior block has extra edges, remove them. MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty()); @@ -1455,8 +1522,8 @@ ReoptimizeBlock: // change this to an unconditional branch (and fix the CFG). MachineBasicBlock *NewCurTBB = nullptr, *NewCurFBB = nullptr; SmallVector<MachineOperand, 4> NewCurCond; - bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB, - NewCurFBB, NewCurCond, true); + bool NewCurUnAnalyzable = TII->analyzeBranch( + *PMBB, NewCurTBB, NewCurFBB, NewCurCond, true); if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) { DebugLoc pdl = getBranchDebugLoc(*PMBB); TII->RemoveBranch(*PMBB); @@ -1502,9 +1569,9 @@ ReoptimizeBlock: MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector<MachineOperand, 4> PredCond; if (PredBB != MBB && !PredBB->canFallThrough() && - !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) - && (!CurFallsThru || !CurTBB || !CurFBB) - && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) { + !TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) && + (!CurFallsThru || !CurTBB || !CurFBB) && + (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) { // If the current block doesn't fall through, just move it. // If the current block can fall through and does not end with a // conditional branch, we need to append an unconditional jump to @@ -1560,7 +1627,7 @@ ReoptimizeBlock: // Now check to see if the current block is sitting between PrevBB and // a block to which it could fall through. if (FallThrough != MF.end() && - !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) && + !TII->analyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) && PrevBB.isSuccessor(&*FallThrough)) { MBB->moveAfter(&MF.back()); MadeChange = true; @@ -1623,7 +1690,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, SmallSet<unsigned,4> &Uses, SmallSet<unsigned,4> &Defs) { MachineBasicBlock::iterator Loc = MBB->getFirstTerminator(); - if (!TII->isUnpredicatedTerminator(Loc)) + if (!TII->isUnpredicatedTerminator(*Loc)) return MBB->end(); for (const MachineOperand &MO : Loc->operands()) { @@ -1685,7 +1752,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, // Also avoid moving code above predicated instruction since it's hard to // reason about register liveness with predicated instruction. bool DontMoveAcrossStore = true; - if (!PI->isSafeToMove(nullptr, DontMoveAcrossStore) || TII->isPredicated(PI)) + if (!PI->isSafeToMove(nullptr, DontMoveAcrossStore) || TII->isPredicated(*PI)) return MBB->end(); @@ -1719,7 +1786,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; - if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty()) + if (TII->analyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty()) return false; if (!FBB) FBB = findFalseBlock(MBB, TBB); @@ -1762,10 +1829,10 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (FIB == FIE) break; } - if (!TIB->isIdenticalTo(FIB, MachineInstr::CheckKillDead)) + if (!TIB->isIdenticalTo(*FIB, MachineInstr::CheckKillDead)) break; - if (TII->isPredicated(TIB)) + if (TII->isPredicated(*TIB)) // Hard to reason about register liveness with predicated instruction. break; @@ -1844,7 +1911,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (!MO.isReg() || !MO.isDef() || MO.isDead()) continue; unsigned Reg = MO.getReg(); - if (!Reg) + if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg)) continue; LocalDefs.push_back(Reg); addRegAndItsAliases(Reg, TRI, LocalDefsSet); diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h index d759d53..36a5a2e 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.h +++ b/contrib/llvm/lib/CodeGen/BranchFolding.h @@ -11,6 +11,7 @@ #define LLVM_LIB_CODEGEN_BRANCHFOLDING_H #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/Support/BlockFrequency.h" #include <vector> @@ -20,20 +21,23 @@ namespace llvm { class MachineBranchProbabilityInfo; class MachineFunction; class MachineModuleInfo; - class RegScavenger; + class MachineLoopInfo; class TargetInstrInfo; class TargetRegisterInfo; class LLVM_LIBRARY_VISIBILITY BranchFolder { public: + class MBFIWrapper; + explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, - const MachineBlockFrequencyInfo &MBFI, + MBFIWrapper &MBFI, const MachineBranchProbabilityInfo &MBPI); - bool OptimizeFunction(MachineFunction &MF, - const TargetInstrInfo *tii, - const TargetRegisterInfo *tri, - MachineModuleInfo *mmi); + bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, MachineModuleInfo *mmi, + MachineLoopInfo *mli = nullptr, + bool AfterPlacement = false); + private: class MergePotentialsElt { unsigned Hash; @@ -91,13 +95,17 @@ namespace llvm { }; std::vector<SameTailElt> SameTails; + bool AfterBlockPlacement; bool EnableTailMerge; bool EnableHoistCommonCode; + bool UpdateLiveIns; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MachineModuleInfo *MMI; - RegScavenger *RS; + MachineLoopInfo *MLI; + LivePhysRegs LiveRegs; + public: /// \brief This class keeps track of branch frequencies of newly created /// blocks and tail-merged blocks. class MBFIWrapper { @@ -105,21 +113,25 @@ namespace llvm { MBFIWrapper(const MachineBlockFrequencyInfo &I) : MBFI(I) {} BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const; void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F); + raw_ostream &printBlockFreq(raw_ostream &OS, + const MachineBasicBlock *MBB) const; + raw_ostream &printBlockFreq(raw_ostream &OS, + const BlockFrequency Freq) const; private: const MachineBlockFrequencyInfo &MBFI; DenseMap<const MachineBasicBlock *, BlockFrequency> MergedBBFreq; }; - MBFIWrapper MBBFreqInfo; + private: + MBFIWrapper &MBBFreqInfo; const MachineBranchProbabilityInfo &MBPI; bool TailMergeBlocks(MachineFunction &MF); bool TryTailMergeBlocks(MachineBasicBlock* SuccBB, MachineBasicBlock* PredBB); void setCommonTailEdgeWeights(MachineBasicBlock &TailMBB); - void MaintainLiveIns(MachineBasicBlock *CurMBB, - MachineBasicBlock *NewMBB); + void computeLiveIns(MachineBasicBlock &MBB); void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest); MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, diff --git a/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp b/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp new file mode 100644 index 0000000..ff7c99d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/BuiltinGCs.cpp @@ -0,0 +1,139 @@ +//===-- BuiltinGCs.cpp - Boilerplate for our built in GC types --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the boilerplate required to define our various built in +// gc lowering strategies. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCs.h" +#include "llvm/CodeGen/GCStrategy.h" + +using namespace llvm; + +namespace { + +/// An example GC which attempts to be compatibile with Erlang/OTP garbage +/// collector. +/// +/// The frametable emitter is in ErlangGCPrinter.cpp. +class ErlangGC : public GCStrategy { +public: + ErlangGC() { + InitRoots = false; + NeededSafePoints = 1 << GC::PostCall; + UsesMetadata = true; + CustomRoots = false; + } +}; + +/// An example GC which attempts to be compatible with Objective Caml 3.10.0 +/// +/// The frametable emitter is in OcamlGCPrinter.cpp. +class OcamlGC : public GCStrategy { +public: + OcamlGC() { + NeededSafePoints = 1 << GC::PostCall; + UsesMetadata = true; + } +}; + +/// A GC strategy for uncooperative targets. This implements lowering for the +/// llvm.gc* intrinsics for targets that do not natively support them (which +/// includes the C backend). Note that the code generated is not quite as +/// efficient as algorithms which generate stack maps to identify roots. +/// +/// In order to support this particular transformation, all stack roots are +/// coallocated in the stack. This allows a fully target-independent stack map +/// while introducing only minor runtime overhead. +class ShadowStackGC : public GCStrategy { +public: + ShadowStackGC() { + InitRoots = true; + CustomRoots = true; + } +}; + +/// A GCStrategy which serves as an example for the usage of a statepoint based +/// lowering strategy. This GCStrategy is intended to suitable as a default +/// implementation usable with any collector which can consume the standard +/// stackmap format generated by statepoints, uses the default addrespace to +/// distinguish between gc managed and non-gc managed pointers, and has +/// reasonable relocation semantics. +class StatepointGC : public GCStrategy { +public: + StatepointGC() { + UseStatepoints = true; + // These options are all gc.root specific, we specify them so that the + // gc.root lowering code doesn't run. + InitRoots = false; + NeededSafePoints = 0; + UsesMetadata = false; + CustomRoots = false; + } + Optional<bool> isGCManagedPointer(const Type *Ty) const override { + // Method is only valid on pointer typed values. + const PointerType *PT = cast<PointerType>(Ty); + // For the sake of this example GC, we arbitrarily pick addrspace(1) as our + // GC managed heap. We know that a pointer into this heap needs to be + // updated and that no other pointer does. Note that addrspace(1) is used + // only as an example, it has no special meaning, and is not reserved for + // GC usage. + return (1 == PT->getAddressSpace()); + } +}; + +/// A GCStrategy for the CoreCLR Runtime. The strategy is similar to +/// Statepoint-example GC, but differs from it in certain aspects, such as: +/// 1) Base-pointers need not be explicitly tracked and reported for +/// interior pointers +/// 2) Uses a different format for encoding stack-maps +/// 3) Location of Safe-point polls: polls are only needed before loop-back +/// edges and before tail-calls (not needed at function-entry) +/// +/// The above differences in behavior are to be implemented in upcoming +/// checkins. +class CoreCLRGC : public GCStrategy { +public: + CoreCLRGC() { + UseStatepoints = true; + // These options are all gc.root specific, we specify them so that the + // gc.root lowering code doesn't run. + InitRoots = false; + NeededSafePoints = 0; + UsesMetadata = false; + CustomRoots = false; + } + Optional<bool> isGCManagedPointer(const Type *Ty) const override { + // Method is only valid on pointer typed values. + const PointerType *PT = cast<PointerType>(Ty); + // We pick addrspace(1) as our GC managed heap. + return (1 == PT->getAddressSpace()); + } +}; +} + +// Register all the above so that they can be found at runtime. Note that +// these static initializers are important since the registration list is +// constructed from their storage. +static GCRegistry::Add<ErlangGC> A("erlang", + "erlang-compatible garbage collector"); +static GCRegistry::Add<OcamlGC> B("ocaml", "ocaml 3.10-compatible GC"); +static GCRegistry::Add<ShadowStackGC> + C("shadow-stack", "Very portable GC for uncooperative code generators"); +static GCRegistry::Add<StatepointGC> D("statepoint-example", + "an example strategy for statepoint"); +static GCRegistry::Add<CoreCLRGC> E("coreclr", "CoreCLR-compatible GC"); + +// Provide hooks to ensure the containing library is fully loaded. +void llvm::linkErlangGC() {} +void llvm::linkOcamlGC() {} +void llvm::linkShadowStackGC() {} +void llvm::linkStatepointExampleGC() {} +void llvm::linkCoreCLRGC() {} diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp index 26aa46f..dc2d38a 100644 --- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -121,7 +121,7 @@ static bool isRematerializable(const LiveInterval &LI, } } - if (!TII.isTriviallyReMaterializable(MI, LIS.getAliasAnalysis())) + if (!TII.isTriviallyReMaterializable(*MI, LIS.getAliasAnalysis())) return false; } return true; @@ -170,8 +170,7 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { // Calculate instr weight. bool reads, writes; std::tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); - weight = LiveIntervals::getSpillWeight( - writes, reads, &MBFI, mi); + weight = LiveIntervals::getSpillWeight(writes, reads, &MBFI, *mi); // Give extra weight to what looks like a loop induction variable update. if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb)) @@ -192,11 +191,15 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { // FIXME: we probably shouldn't use floats at all. volatile float hweight = Hint[hint] += weight; if (TargetRegisterInfo::isPhysicalRegister(hint)) { - if (hweight > bestPhys && mri.isAllocatable(hint)) - bestPhys = hweight, hintPhys = hint; + if (hweight > bestPhys && mri.isAllocatable(hint)) { + bestPhys = hweight; + hintPhys = hint; + } } else { - if (hweight > bestVirt) - bestVirt = hweight, hintVirt = hint; + if (hweight > bestVirt) { + bestVirt = hweight; + hintVirt = hint; + } } } diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp index 23c0d54..7d67bcf 100644 --- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp @@ -51,9 +51,9 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT, Size = MinSize; if (MinAlign > (int)Align) Align = MinAlign; - MF.getFrameInfo()->ensureMaxAlignment(Align); + ensureMaxAlignment(Align); MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, Align); - Size = unsigned(RoundUpToAlignment(Size, MinAlign)); + Size = unsigned(alignTo(Size, MinAlign)); unsigned Offset = AllocateStack(Size, Align); addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); } @@ -236,6 +236,7 @@ void CCState::analyzeMustTailForwardedRegisters( // variadic functions, so we need to assume we're not variadic so that we get // all the registers that might be used in a non-variadic call. SaveAndRestore<bool> SavedVarArg(IsVarArg, false); + SaveAndRestore<bool> SavedMustTail(AnalyzingMustTailForwardedRegs, true); for (MVT RegVT : RegParmTypes) { SmallVector<MCPhysReg, 8> RemainingRegs; @@ -248,3 +249,39 @@ void CCState::analyzeMustTailForwardedRegisters( } } } + +bool CCState::resultsCompatible(CallingConv::ID CalleeCC, + CallingConv::ID CallerCC, MachineFunction &MF, + LLVMContext &C, + const SmallVectorImpl<ISD::InputArg> &Ins, + CCAssignFn CalleeFn, CCAssignFn CallerFn) { + if (CalleeCC == CallerCC) + return true; + SmallVector<CCValAssign, 4> RVLocs1; + CCState CCInfo1(CalleeCC, false, MF, RVLocs1, C); + CCInfo1.AnalyzeCallResult(Ins, CalleeFn); + + SmallVector<CCValAssign, 4> RVLocs2; + CCState CCInfo2(CallerCC, false, MF, RVLocs2, C); + CCInfo2.AnalyzeCallResult(Ins, CallerFn); + + if (RVLocs1.size() != RVLocs2.size()) + return false; + for (unsigned I = 0, E = RVLocs1.size(); I != E; ++I) { + const CCValAssign &Loc1 = RVLocs1[I]; + const CCValAssign &Loc2 = RVLocs2[I]; + if (Loc1.getLocInfo() != Loc2.getLocInfo()) + return false; + bool RegLoc1 = Loc1.isRegLoc(); + if (RegLoc1 != Loc2.isRegLoc()) + return false; + if (RegLoc1) { + if (Loc1.getLocReg() != Loc2.getLocReg()) + return false; + } else { + if (Loc1.getLocMemOffset() != Loc2.getLocMemOffset()) + return false; + } + } + return true; +} diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp index dc13b5b..6679819 100644 --- a/contrib/llvm/lib/CodeGen/CodeGen.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp @@ -24,6 +24,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeBranchFolderPassPass(Registry); initializeCodeGenPreparePass(Registry); initializeDeadMachineInstructionElimPass(Registry); + initializeDetectDeadLanesPass(Registry); initializeDwarfEHPreparePass(Registry); initializeEarlyIfConverterPass(Registry); initializeExpandISelPseudosPass(Registry); @@ -33,6 +34,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeGCMachineCodeAnalysisPass(Registry); initializeGCModuleInfoPass(Registry); initializeIfConverterPass(Registry); + initializeInterleavedAccessPass(Registry); initializeLiveDebugVariablesPass(Registry); initializeLiveIntervalsPass(Registry); initializeLiveStacksPass(Registry); @@ -55,26 +57,32 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMachineSchedulerPass(Registry); initializeMachineSinkingPass(Registry); initializeMachineVerifierPassPass(Registry); + initializeXRayInstrumentationPass(Registry); + initializePatchableFunctionPass(Registry); initializeOptimizePHIsPass(Registry); initializePEIPass(Registry); initializePHIEliminationPass(Registry); initializePeepholeOptimizerPass(Registry); initializePostMachineSchedulerPass(Registry); + initializePostRAHazardRecognizerPass(Registry); initializePostRASchedulerPass(Registry); + initializePreISelIntrinsicLoweringLegacyPassPass(Registry); initializeProcessImplicitDefsPass(Registry); initializeRegisterCoalescerPass(Registry); + initializeRenameIndependentSubregsPass(Registry); initializeShrinkWrapPass(Registry); initializeSlotIndexesPass(Registry); initializeStackColoringPass(Registry); initializeStackMapLivenessPass(Registry); initializeLiveDebugValuesPass(Registry); + initializeSafeStackPass(Registry); initializeStackProtectorPass(Registry); initializeStackSlotColoringPass(Registry); initializeTailDuplicatePassPass(Registry); initializeTargetPassConfigPass(Registry); initializeTwoAddressInstructionPassPass(Registry); initializeUnpackMachineBundlesPass(Registry); - initializeUnreachableBlockElimPass(Registry); + initializeUnreachableBlockElimLegacyPassPass(Registry); initializeUnreachableMachineBlockElimPass(Registry); initializeVirtRegMapPass(Registry); initializeVirtRegRewriterPass(Registry); diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp index c8007a5..ede4041 100644 --- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -18,9 +18,11 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -38,6 +40,7 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -111,6 +114,10 @@ static cl::opt<bool> StressExtLdPromotion( cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare")); +static cl::opt<bool> DisablePreheaderProtect( + "disable-preheader-prot", cl::Hidden, cl::init(false), + cl::desc("Disable protection against removing loop preheaders")); + namespace { typedef SmallPtrSet<Instruction *, 16> SetOfInstrs; typedef PointerIntPair<Type *, 1, bool> TypeIsSExt; @@ -122,6 +129,7 @@ class TypePromotionTransaction; const TargetLowering *TLI; const TargetTransformInfo *TTI; const TargetLibraryInfo *TLInfo; + const LoopInfo *LI; /// As we scan instructions optimizing them, this is the next instruction /// to optimize. Transforms that can invalidate this should update it. @@ -158,9 +166,10 @@ class TypePromotionTransaction; const char *getPassName() const override { return "CodeGen Prepare"; } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addPreserved<DominatorTreeWrapperPass>(); + // FIXME: When we can selectively preserve passes, preserve the domtree. AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); } private: @@ -203,7 +212,7 @@ FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) { } bool CodeGenPrepare::runOnFunction(Function &F) { - if (skipOptnoneFunction(F)) + if (skipFunction(F)) return false; DL = &F.getParent()->getDataLayout(); @@ -218,6 +227,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { TLI = TM->getSubtargetImpl(F)->getTargetLowering(); TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); OptSize = F.optForSize(); /// This optimization identifies DIV instructions that can be @@ -359,6 +369,15 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) { /// edges in ways that are non-optimal for isel. Start by eliminating these /// blocks so we can split them the way we want them. bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) { + SmallPtrSet<BasicBlock *, 16> Preheaders; + SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end()); + while (!LoopList.empty()) { + Loop *L = LoopList.pop_back_val(); + LoopList.insert(LoopList.end(), L->begin(), L->end()); + if (BasicBlock *Preheader = L->getLoopPreheader()) + Preheaders.insert(Preheader); + } + bool MadeChange = false; // Note that this intentionally skips the entry block. for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { @@ -391,6 +410,14 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) { if (!canMergeBlocks(BB, DestBB)) continue; + // Do not delete loop preheaders if doing so would create a critical edge. + // Loop preheaders can be good locations to spill registers. If the + // preheader is deleted and we create a critical edge, registers may be + // spilled in the loop body instead. + if (!DisablePreheaderProtect && Preheaders.count(BB) && + !(BB->getSinglePredecessor() && BB->getSinglePredecessor()->getSingleSuccessor())) + continue; + eliminateMostlyEmptyBlock(BB); MadeChange = true; } @@ -612,7 +639,8 @@ simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, continue; // Create a Builder and replace the target callsite with a gep - assert(RelocatedBase->getNextNode() && "Should always have one since it's not a terminator"); + assert(RelocatedBase->getNextNode() && + "Should always have one since it's not a terminator"); // Insert after RelocatedBase IRBuilder<> Builder(RelocatedBase->getNextNode()); @@ -730,6 +758,11 @@ static bool SinkCast(CastInst *CI) { // Preincrement use iterator so we don't invalidate it. ++UI; + // The first insertion point of a block containing an EH pad is after the + // pad. If the pad is the user, we cannot sink the cast past the pad. + if (User->isEHPad()) + continue; + // If the block selected to receive the cast is an EH pad that does not // allow non-PHI instructions before the terminator, we can't sink the // cast. @@ -854,10 +887,14 @@ static bool CombineUAddWithOverflow(CmpInst *CI) { /// lose; some adjustment may be wanted there. /// /// Return true if any changes are made. -static bool SinkCmpExpression(CmpInst *CI) { +static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) { BasicBlock *DefBB = CI->getParent(); - /// Only insert a cmp in each block once. + // Avoid sinking soft-FP comparisons, since this can move them into a loop. + if (TLI && TLI->useSoftFloat() && isa<FCmpInst>(CI)) + return false; + + // Only insert a cmp in each block once. DenseMap<BasicBlock*, CmpInst*> InsertedCmps; bool MadeChange = false; @@ -905,8 +942,8 @@ static bool SinkCmpExpression(CmpInst *CI) { return MadeChange; } -static bool OptimizeCmpExpression(CmpInst *CI) { - if (SinkCmpExpression(CI)) +static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) { + if (SinkCmpExpression(CI, TLI)) return true; if (CombineUAddWithOverflow(CI)) @@ -1138,7 +1175,7 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, // %13 = icmp eq i1 %12, true // br i1 %13, label %cond.load4, label %else5 // -static void ScalarizeMaskedLoad(CallInst *CI) { +static void scalarizeMaskedLoad(CallInst *CI) { Value *Ptr = CI->getArgOperand(0); Value *Alignment = CI->getArgOperand(1); Value *Mask = CI->getArgOperand(2); @@ -1284,7 +1321,7 @@ static void ScalarizeMaskedLoad(CallInst *CI) { // store i32 %8, i32* %9 // br label %else2 // . . . -static void ScalarizeMaskedStore(CallInst *CI) { +static void scalarizeMaskedStore(CallInst *CI) { Value *Src = CI->getArgOperand(0); Value *Ptr = CI->getArgOperand(1); Value *Alignment = CI->getArgOperand(2); @@ -1403,7 +1440,7 @@ static void ScalarizeMaskedStore(CallInst *CI) { // . . . // % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src // ret <16 x i32> %Result -static void ScalarizeMaskedGather(CallInst *CI) { +static void scalarizeMaskedGather(CallInst *CI) { Value *Ptrs = CI->getArgOperand(0); Value *Alignment = CI->getArgOperand(1); Value *Mask = CI->getArgOperand(2); @@ -1538,7 +1575,7 @@ static void ScalarizeMaskedGather(CallInst *CI) { // store i32 % Elt1, i32* % Ptr1, align 4 // br label %else2 // . . . -static void ScalarizeMaskedScatter(CallInst *CI) { +static void scalarizeMaskedScatter(CallInst *CI) { Value *Src = CI->getArgOperand(0); Value *Ptrs = CI->getArgOperand(1); Value *Alignment = CI->getArgOperand(2); @@ -1653,7 +1690,7 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, // Only handle legal scalar cases. Anything else requires too much work. Type *Ty = CountZeros->getType(); unsigned SizeInBits = Ty->getPrimitiveSizeInBits(); - if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSize()) + if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits()) return false; // The intrinsic will be sunk behind a compare against zero and branch. @@ -1743,8 +1780,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { // forbidden. GlobalVariable *GV; if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() && - GV->getAlignment() < PrefAlign && - DL->getTypeAllocSize(GV->getType()->getElementType()) >= + GV->getPointerAlignment(*DL) < PrefAlign && + DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2) GV->setAlignment(PrefAlign); } @@ -1759,27 +1796,47 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { } } + // If we have a cold call site, try to sink addressing computation into the + // cold block. This interacts with our handling for loads and stores to + // ensure that we can fold all uses of a potential addressing computation + // into their uses. TODO: generalize this to work over profiling data + if (!OptSize && CI->hasFnAttr(Attribute::Cold)) + for (auto &Arg : CI->arg_operands()) { + if (!Arg->getType()->isPointerTy()) + continue; + unsigned AS = Arg->getType()->getPointerAddressSpace(); + return optimizeMemoryInst(CI, Arg, Arg->getType(), AS); + } + IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::objectsize: { // Lower all uses of llvm.objectsize.* - bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); + uint64_t Size; Type *ReturnTy = CI->getType(); - Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); - + Constant *RetVal = nullptr; + ConstantInt *Op1 = cast<ConstantInt>(II->getArgOperand(1)); + ObjSizeMode Mode = Op1->isZero() ? ObjSizeMode::Max : ObjSizeMode::Min; + if (getObjectSize(II->getArgOperand(0), + Size, *DL, TLInfo, false, Mode)) { + RetVal = ConstantInt::get(ReturnTy, Size); + } else { + RetVal = ConstantInt::get(ReturnTy, + Mode == ObjSizeMode::Min ? 0 : -1ULL); + } // Substituting this can cause recursive simplifications, which can // invalidate our iterator. Use a WeakVH to hold onto it in case this // happens. - WeakVH IterHandle(&*CurInstIterator); + Value *CurValue = &*CurInstIterator; + WeakVH IterHandle(CurValue); - replaceAndRecursivelySimplify(CI, RetVal, - TLInfo, nullptr); + replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); // If the iterator instruction was recursively deleted, start over at the // start of the block. - if (IterHandle != CurInstIterator.getNodePtrUnchecked()) { + if (IterHandle != CurValue) { CurInstIterator = BB->begin(); SunkAddrs.clear(); } @@ -1788,7 +1845,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { case Intrinsic::masked_load: { // Scalarize unsupported vector masked load if (!TTI->isLegalMaskedLoad(CI->getType())) { - ScalarizeMaskedLoad(CI); + scalarizeMaskedLoad(CI); ModifiedDT = true; return true; } @@ -1796,7 +1853,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { } case Intrinsic::masked_store: { if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) { - ScalarizeMaskedStore(CI); + scalarizeMaskedStore(CI); ModifiedDT = true; return true; } @@ -1804,7 +1861,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { } case Intrinsic::masked_gather: { if (!TTI->isLegalMaskedGather(CI->getType())) { - ScalarizeMaskedGather(CI); + scalarizeMaskedGather(CI); ModifiedDT = true; return true; } @@ -1812,7 +1869,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { } case Intrinsic::masked_scatter: { if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) { - ScalarizeMaskedScatter(CI); + scalarizeMaskedScatter(CI); ModifiedDT = true; return true; } @@ -2076,7 +2133,7 @@ void ExtAddrMode::print(raw_ostream &OS) const { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void ExtAddrMode::dump() const { +LLVM_DUMP_METHOD void ExtAddrMode::dump() const { print(dbgs()); dbgs() << '\n'; } @@ -3442,6 +3499,8 @@ static bool FindAllMemoryUses( if (!MightBeFoldableInst(I)) return true; + const bool OptSize = I->getFunction()->optForSize(); + // Loop over all the uses, recursively processing them. for (Use &U : I->uses()) { Instruction *UserI = cast<Instruction>(U.getUser()); @@ -3459,6 +3518,11 @@ static bool FindAllMemoryUses( } if (CallInst *CI = dyn_cast<CallInst>(UserI)) { + // If this is a cold call, we can sink the addressing calculation into + // the cold path. See optimizeCallInst + if (!OptSize && CI->hasFnAttr(Attribute::Cold)) + continue; + InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); if (!IA) return true; @@ -3550,10 +3614,10 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, if (!BaseReg && !ScaledReg) return true; - // If all uses of this instruction are ultimately load/store/inlineasm's, - // check to see if their addressing modes will include this instruction. If - // so, we can fold it into all uses, so it doesn't matter if it has multiple - // uses. + // If all uses of this instruction can have the address mode sunk into them, + // we can remove the addressing mode and effectively trade one live register + // for another (at worst.) In this context, folding an addressing mode into + // the use is just a particularly nice way of sinking it. SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; SmallPtrSet<Instruction*, 16> ConsideredInsts; if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM)) @@ -3561,8 +3625,13 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, // Now that we know that all uses of this instruction are part of a chain of // computation involving only operations that could theoretically be folded - // into a memory use, loop over each of these uses and see if they could - // *actually* fold the instruction. + // into a memory use, loop over each of these memory operation uses and see + // if they could *actually* fold the instruction. The assumption is that + // addressing modes are cheap and that duplicating the computation involved + // many times is worthwhile, even on a fastpath. For sinking candidates + // (i.e. cold call sites), this serves as a way to prevent excessive code + // growth since most architectures have some reasonable small and fast way to + // compute an effective address. (i.e LEA on x86) SmallVector<Instruction*, 32> MatchedAddrModeInsts; for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { Instruction *User = MemoryUses[i].first; @@ -3616,6 +3685,11 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { return false; } +/// Sink addressing mode computation immediate before MemoryInst if doing so +/// can be done without increasing register pressure. The need for the +/// register pressure constraint means this can end up being an all or nothing +/// decision for all uses of the same addressing computation. +/// /// Load and Store Instructions often have addressing modes that can do /// significant amounts of computation. As such, instruction selection will try /// to get the load or store to do as much computation as possible for the @@ -3623,7 +3697,13 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { /// such, we sink as much legal addressing mode work into the block as possible. /// /// This method is used to optimize both load/store and inline asms with memory -/// operands. +/// operands. It's also used to sink addressing computations feeding into cold +/// call sites into their (cold) basic block. +/// +/// The motivation for handling sinking into cold blocks is that doing so can +/// both enable other address mode sinking (by satisfying the register pressure +/// constraint above), and reduce register pressure globally (by removing the +/// addressing mode computation from the fast path entirely.). bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy, unsigned AddrSpace) { Value *Repl = Addr; @@ -3662,7 +3742,9 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, continue; } - // For non-PHIs, determine the addressing mode being computed. + // For non-PHIs, determine the addressing mode being computed. Note that + // the result may differ depending on what other uses our candidate + // addressing instructions might have. SmallVector<Instruction*, 16> NewAddrModeInsts; ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM, @@ -3945,12 +4027,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, if (Repl->use_empty()) { // This can cause recursive deletion, which can invalidate our iterator. // Use a WeakVH to hold onto it in case this happens. - WeakVH IterHandle(&*CurInstIterator); + Value *CurValue = &*CurInstIterator; + WeakVH IterHandle(CurValue); BasicBlock *BB = CurInstIterator->getParent(); RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); - if (IterHandle != CurInstIterator.getNodePtrUnchecked()) { + if (IterHandle != CurValue) { // If the iterator instruction was recursively deleted, start over at the // start of the block. CurInstIterator = BB->begin(); @@ -4461,11 +4544,27 @@ static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) { /// Returns true if a SelectInst should be turned into an explicit branch. static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, + const TargetLowering *TLI, SelectInst *SI) { + // If even a predictable select is cheap, then a branch can't be cheaper. + if (!TLI->isPredictableSelectExpensive()) + return false; + // FIXME: This should use the same heuristics as IfConversion to determine - // whether a select is better represented as a branch. This requires that - // branch probability metadata is preserved for the select, which is not the - // case currently. + // whether a select is better represented as a branch. + + // If metadata tells us that the select condition is obviously predictable, + // then we want to replace the select with a branch. + uint64_t TrueWeight, FalseWeight; + if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { + uint64_t Max = std::max(TrueWeight, FalseWeight); + uint64_t Sum = TrueWeight + FalseWeight; + if (Sum != 0) { + auto Probability = BranchProbability::getBranchProbability(Max, Sum); + if (Probability > TLI->getPredictableBranchThreshold()) + return true; + } + } CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); @@ -4475,17 +4574,6 @@ static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, if (!Cmp || !Cmp->hasOneUse()) return false; - Value *CmpOp0 = Cmp->getOperand(0); - Value *CmpOp1 = Cmp->getOperand(1); - - // Emit "cmov on compare with a memory operand" as a branch to avoid stalls - // on a load from memory. But if the load is used more than once, do not - // change the select to a branch because the load is probably needed - // regardless of whether the branch is taken or not. - if ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) || - (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse())) - return true; - // If either operand of the select is expensive and only needed on one side // of the select, we should form a branch. if (sinkSelectOperand(TTI, SI->getTrueValue()) || @@ -4502,7 +4590,8 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); // Can we convert the 'select' to CF ? - if (DisableSelectToBranch || OptSize || !TLI || VectorCond) + if (DisableSelectToBranch || OptSize || !TLI || VectorCond || + SI->getMetadata(LLVMContext::MD_unpredictable)) return false; TargetLowering::SelectSupportKind SelectKind; @@ -4513,14 +4602,9 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { else SelectKind = TargetLowering::ScalarValSelect; - // Do we have efficient codegen support for this kind of 'selects' ? - if (TLI->isSelectSupported(SelectKind)) { - // We have efficient codegen support for the select instruction. - // Check if it is profitable to keep this 'select'. - if (!TLI->isPredictableSelectExpensive() || - !isFormingBranchFromSelectProfitable(TTI, SI)) - return false; - } + if (TLI->isSelectSupported(SelectKind) && + !isFormingBranchFromSelectProfitable(TTI, TLI, SI)) + return false; ModifiedDT = true; @@ -5145,7 +5229,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { if (CmpInst *CI = dyn_cast<CmpInst>(I)) if (!TLI || !TLI->hasMultipleConditionRegisters()) - return OptimizeCmpExpression(CI); + return OptimizeCmpExpression(CI, TLI); if (LoadInst *LI = dyn_cast<LoadInst>(I)) { stripInvariantGroupMetadata(*LI); @@ -5221,7 +5305,7 @@ static bool makeBitReverse(Instruction &I, const DataLayout &DL, return false; SmallVector<Instruction*, 4> Insts; - if (!recognizeBitReverseOrBSwapIdiom(&I, false, true, Insts)) + if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts)) return false; Instruction *LastInst = Insts.back(); I.replaceAllUsesWith(LastInst); @@ -5249,12 +5333,13 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) { for (auto &I : reverse(BB)) { if (makeBitReverse(I, *DL, *TLI)) { MadeBitReverse = MadeChange = true; + ModifiedDT = true; break; } } } MadeChange |= dupRetToEnableTailCallOpts(&BB); - + return MadeChange; } @@ -5310,43 +5395,38 @@ bool CodeGenPrepare::sinkAndCmp(Function &F) { if (!TLI || !TLI->isMaskAndBranchFoldingLegal()) return false; bool MadeChange = false; - for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { - BasicBlock *BB = &*I++; - + for (BasicBlock &BB : F) { // Does this BB end with the following? // %andVal = and %val, #single-bit-set // %icmpVal = icmp %andResult, 0 // br i1 %cmpVal label %dest1, label %dest2" - BranchInst *Brcc = dyn_cast<BranchInst>(BB->getTerminator()); + BranchInst *Brcc = dyn_cast<BranchInst>(BB.getTerminator()); if (!Brcc || !Brcc->isConditional()) continue; ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0)); - if (!Cmp || Cmp->getParent() != BB) + if (!Cmp || Cmp->getParent() != &BB) continue; ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1)); if (!Zero || !Zero->isZero()) continue; Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0)); - if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB) + if (!And || And->getOpcode() != Instruction::And || And->getParent() != &BB) continue; ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1)); if (!Mask || !Mask->getUniqueInteger().isPowerOf2()) continue; - DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump()); + DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB.dump()); // Push the "and; icmp" for any users that are conditional branches. // Since there can only be one branch use per BB, we don't need to keep // track of which BBs we insert into. - for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end(); - UI != E; ) { - Use &TheUse = *UI; + for (Use &TheUse : Cmp->uses()) { // Find brcc use. - BranchInst *BrccUser = dyn_cast<BranchInst>(*UI); - ++UI; + BranchInst *BrccUser = dyn_cast<BranchInst>(TheUse); if (!BrccUser || !BrccUser->isConditional()) continue; BasicBlock *UserBB = BrccUser->getParent(); - if (UserBB == BB) continue; + if (UserBB == &BB) continue; DEBUG(dbgs() << "found Brcc use\n"); // Sink the "and; icmp" to use. @@ -5365,29 +5445,6 @@ bool CodeGenPrepare::sinkAndCmp(Function &F) { return MadeChange; } -/// \brief Retrieve the probabilities of a conditional branch. Returns true on -/// success, or returns false if no or invalid metadata was found. -static bool extractBranchMetadata(BranchInst *BI, - uint64_t &ProbTrue, uint64_t &ProbFalse) { - assert(BI->isConditional() && - "Looking for probabilities on unconditional branch?"); - auto *ProfileData = BI->getMetadata(LLVMContext::MD_prof); - if (!ProfileData || ProfileData->getNumOperands() != 3) - return false; - - const auto *CITrue = - mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1)); - const auto *CIFalse = - mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2)); - if (!CITrue || !CIFalse) - return false; - - ProbTrue = CITrue->getValue().getZExtValue(); - ProbFalse = CIFalse->getValue().getZExtValue(); - - return true; -} - /// \brief Scale down both weights to fit into uint32_t. static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; @@ -5456,11 +5513,9 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump()); // Create a new BB. - auto *InsertBefore = std::next(Function::iterator(BB)) - .getNodePtrUnchecked(); - auto TmpBB = BasicBlock::Create(BB.getContext(), - BB.getName() + ".cond.split", - BB.getParent(), InsertBefore); + auto TmpBB = + BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split", + BB.getParent(), BB.getNextNode()); // Update original basic block by using the first condition directly by the // branch instruction and removing the no longer needed and/or instruction. @@ -5535,7 +5590,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { // Another choice is to assume TrueProb for BB1 equals to TrueProb for // TmpBB, but the math is more complicated. uint64_t TrueWeight, FalseWeight; - if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) { + if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) { uint64_t NewTrueWeight = TrueWeight; uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight; scaleWeights(NewTrueWeight, NewFalseWeight); @@ -5568,7 +5623,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) { // assumes that // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. uint64_t TrueWeight, FalseWeight; - if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) { + if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) { uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight; uint64_t NewFalseWeight = FalseWeight; scaleWeights(NewTrueWeight, NewFalseWeight); diff --git a/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp b/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp deleted file mode 100644 index ff7c0d5..0000000 --- a/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp +++ /dev/null @@ -1,54 +0,0 @@ -//===-- CoreCLRGC.cpp - CoreCLR Runtime GC Strategy -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a GCStrategy for the CoreCLR Runtime. -// The strategy is similar to Statepoint-example GC, but differs from it in -// certain aspects, such as: -// 1) Base-pointers need not be explicitly tracked and reported for -// interior pointers -// 2) Uses a different format for encoding stack-maps -// 3) Location of Safe-point polls: polls are only needed before loop-back edges -// and before tail-calls (not needed at function-entry) -// -// The above differences in behavior are to be implemented in upcoming checkins. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/GCStrategy.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Value.h" - -using namespace llvm; - -namespace { -class CoreCLRGC : public GCStrategy { -public: - CoreCLRGC() { - UseStatepoints = true; - // These options are all gc.root specific, we specify them so that the - // gc.root lowering code doesn't run. - InitRoots = false; - NeededSafePoints = 0; - UsesMetadata = false; - CustomRoots = false; - } - Optional<bool> isGCManagedPointer(const Type *Ty) const override { - // Method is only valid on pointer typed values. - const PointerType *PT = cast<PointerType>(Ty); - // We pick addrspace(1) as our GC managed heap. - return (1 == PT->getAddressSpace()); - } -}; -} - -static GCRegistry::Add<CoreCLRGC> X("coreclr", "CoreCLR-compatible GC"); - -namespace llvm { -void linkCoreCLRGC() {} -} diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index c924ba3..a0189a1 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -87,7 +87,7 @@ void CriticalAntiDepBreaker::FinishBlock() { KeepRegs.reset(); } -void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, +void CriticalAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count, unsigned InsertPosIndex) { // Kill instructions can define registers but are really nops, and there might // be a real definition earlier that needs to be paired with uses dominated by @@ -96,7 +96,7 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, // FIXME: It may be possible to remove the isKill() restriction once PR18663 // has been properly fixed. There can be value in processing kills as seen in // the AggressiveAntiDepBreaker class. - if (MI->isDebugValue() || MI->isKill()) + if (MI.isDebugValue() || MI.isKill()) return; assert(Count < InsertPosIndex && "Instruction index out of expected range!"); @@ -146,7 +146,7 @@ static const SDep *CriticalPathStep(const SUnit *SU) { return Next; } -void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { +void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) { // It's not safe to change register allocation for source operands of // instructions that have special allocation requirements. Also assume all // registers used in a call must not be changed (ABI). @@ -163,21 +163,20 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { // instruction which may not be executed. The second R6 def may or may not // re-define R6 so it's not safe to change it since the last R6 use cannot be // changed. - bool Special = MI->isCall() || - MI->hasExtraSrcRegAllocReq() || - TII->isPredicated(MI); + bool Special = + MI.isCall() || MI.hasExtraSrcRegAllocReq() || TII->isPredicated(MI); // Scan the register operands for this instruction and update // Classes and RegRefs. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; const TargetRegisterClass *NewRC = nullptr; - if (i < MI->getDesc().getNumOperands()) - NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF); + if (i < MI.getDesc().getNumOperands()) + NewRC = TII->getRegClass(MI.getDesc(), i, TRI, MF); // For now, only allow the register to be changed if its register // class is consistent across all uses. @@ -212,7 +211,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { // of a register? In the above 'xor' example, the uses of %eax are undef, so // earlier instructions could still replace %eax even though the 'xor' // itself can't be changed. - if (MI->isRegTiedToUseOperand(i) && + if (MI.isRegTiedToUseOperand(i) && Classes[Reg] == reinterpret_cast<TargetRegisterClass *>(-1)) { for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) { @@ -234,18 +233,17 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { } } -void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, - unsigned Count) { +void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) { // Update liveness. // Proceeding upwards, registers that are defed but not used in this // instruction are now dead. - assert(!MI->isKill() && "Attempting to scan a kill instruction"); + assert(!MI.isKill() && "Attempting to scan a kill instruction"); if (!TII->isPredicated(MI)) { // Predicated defs are modeled as read + write, i.e. similar to two // address updates. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); if (MO.isRegMask()) for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) @@ -262,11 +260,13 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, if (Reg == 0) continue; if (!MO.isDef()) continue; - // If we've already marked this reg as unchangeable, carry on. - if (KeepRegs.test(Reg)) continue; - // Ignore two-addr defs. - if (MI->isRegTiedToUseOperand(i)) continue; + if (MI.isRegTiedToUseOperand(i)) + continue; + + // If we've already marked this reg as unchangeable, don't remove + // it or any of its subregs from KeepRegs. + bool Keep = KeepRegs.test(Reg); // For the reg itself and all subregs: update the def to current; // reset the kill state, any restrictions, and references. @@ -274,25 +274,26 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, unsigned SubregReg = *SRI; DefIndices[SubregReg] = Count; KillIndices[SubregReg] = ~0u; - KeepRegs.reset(SubregReg); Classes[SubregReg] = nullptr; RegRefs.erase(SubregReg); + if (!Keep) + KeepRegs.reset(SubregReg); } // Conservatively mark super-registers as unusable. for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) Classes[*SR] = reinterpret_cast<TargetRegisterClass *>(-1); } } - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; if (!MO.isUse()) continue; const TargetRegisterClass *NewRC = nullptr; - if (i < MI->getDesc().getNumOperands()) - NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF); + if (i < MI.getDesc().getNumOperands()) + NewRC = TII->getRegClass(MI.getDesc(), i, TRI, MF); // For now, only allow the register to be changed if its register // class is consistent across all uses. @@ -510,7 +511,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, unsigned Broken = 0; unsigned Count = InsertPosIndex - 1; for (MachineBasicBlock::iterator I = End, E = Begin; I != E; --Count) { - MachineInstr *MI = --I; + MachineInstr &MI = *--I; // Kill instructions can define registers but are really nops, and there // might be a real definition earlier that needs to be paired with uses // dominated by this kill. @@ -518,7 +519,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // FIXME: It may be possible to remove the isKill() restriction once PR18663 // has been properly fixed. There can be value in processing kills as seen // in the AggressiveAntiDepBreaker class. - if (MI->isDebugValue() || MI->isKill()) + if (MI.isDebugValue() || MI.isKill()) continue; // Check if this instruction has a dependence on the critical path that @@ -535,7 +536,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // edge per instruction. Note that we'd have to be able to break all of // the anti-dependencies in an instruction in order to be effective. unsigned AntiDepReg = 0; - if (MI == CriticalPathMI) { + if (&MI == CriticalPathMI) { if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) { const SUnit *NextSU = Edge->getSUnit(); @@ -585,7 +586,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers // defined in a call must not be changed (ABI). - if (MI->isCall() || MI->hasExtraDefRegAllocReq() || TII->isPredicated(MI)) + if (MI.isCall() || MI.hasExtraDefRegAllocReq() || TII->isPredicated(MI)) // If this instruction's defs have special allocation requirement, don't // break this anti-dependency. AntiDepReg = 0; @@ -594,8 +595,8 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // is invalid. If the instruction defines other registers, // save a list of them so that we don't pick a new register // that overlaps any of them. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; @@ -647,7 +648,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, for (DbgValueVector::iterator DVI = DbgValues.begin(), DVE = DbgValues.end(); DVI != DVE; ++DVI) if (DVI->second == Q->second->getParent()) - UpdateDbgValue(DVI->first, AntiDepReg, NewReg); + UpdateDbgValue(*DVI->first, AntiDepReg, NewReg); } // We just went back in time and modified history; the diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h index 10b8739..678779f 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h @@ -19,17 +19,15 @@ #include "AntiDepBreaker.h" #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include <map> namespace llvm { class RegisterClassInfo; class TargetInstrInfo; class TargetRegisterInfo; +class MachineFunction; class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker { MachineFunction& MF; @@ -84,15 +82,15 @@ class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker { /// Update liveness information to account for the current /// instruction, which will not be scheduled. - void Observe(MachineInstr *MI, unsigned Count, + void Observe(MachineInstr &MI, unsigned Count, unsigned InsertPosIndex) override; /// Finish anti-dep breaking for a basic block. void FinishBlock() override; private: - void PrescanInstruction(MachineInstr *MI); - void ScanInstruction(MachineInstr *MI, unsigned Count); + void PrescanInstruction(MachineInstr &MI); + void ScanInstruction(MachineInstr &MI, unsigned Count); bool isNewRegClobberedByRefs(RegRefIter RegRefBegin, RegRefIter RegRefEnd, unsigned NewReg); diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp index af6b6a3..2386af9 100644 --- a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp +++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -23,12 +23,15 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "packets" + #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetInstrInfo.h" + using namespace llvm; // -------------------------------------------------------------------- @@ -44,8 +47,8 @@ namespace { /// DFAPacketizerEmitter.cpp. DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) { DFAInput InsnInput = 0; - assert ((InsnClass.size() <= DFA_MAX_RESTERMS) && - "Exceeded maximum number of DFA terms"); + assert((InsnClass.size() <= DFA_MAX_RESTERMS) && + "Exceeded maximum number of DFA terms"); for (auto U : InsnClass) InsnInput = addDFAFuncUnits(InsnInput, U); return InsnInput; @@ -59,15 +62,16 @@ DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, InstrItins(I), CurrentState(0), DFAStateInputTable(SIT), DFAStateEntryTable(SET) { // Make sure DFA types are large enough for the number of terms & resources. - assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAInput)) - && "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput"); - assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)) - && "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput"); + static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= + (8 * sizeof(DFAInput)), + "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput"); + static_assert( + (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)), + "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput"); } -// -// ReadTable - Read the DFA transition table and update CachedTable. +// Read the DFA transition table and update CachedTable. // // Format of the transition tables: // DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid @@ -80,8 +84,7 @@ void DFAPacketizer::ReadTable(unsigned int state) { unsigned NextStateInTable = DFAStateEntryTable[state+1]; // Early exit in case CachedTable has already contains this // state's transitions. - if (CachedTable.count(UnsignPair(state, - DFAStateInputTable[ThisState][0]))) + if (CachedTable.count(UnsignPair(state, DFAStateInputTable[ThisState][0]))) return; for (unsigned i = ThisState; i < NextStateInTable; i++) @@ -89,38 +92,41 @@ void DFAPacketizer::ReadTable(unsigned int state) { DFAStateInputTable[i][1]; } -// -// getInsnInput - Return the DFAInput for an instruction class. -// + +// Return the DFAInput for an instruction class. DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) { // Note: this logic must match that in DFAPacketizerDefs.h for input vectors. DFAInput InsnInput = 0; unsigned i = 0; + (void)i; for (const InstrStage *IS = InstrItins->beginStage(InsnClass), - *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS, ++i) { + *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS) { InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits()); - assert ((i < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs"); + assert((i++ < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs"); } return InsnInput; } -// getInsnInput - Return the DFAInput for an instruction class input vector. + +// Return the DFAInput for an instruction class input vector. DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) { return getDFAInsnInput(InsnClass); } -// canReserveResources - Check if the resources occupied by a MCInstrDesc -// are available in the current state. + +// Check if the resources occupied by a MCInstrDesc are available in the +// current state. bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) { unsigned InsnClass = MID->getSchedClass(); DFAInput InsnInput = getInsnInput(InsnClass); UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); ReadTable(CurrentState); - return (CachedTable.count(StateTrans) != 0); + return CachedTable.count(StateTrans) != 0; } -// reserveResources - Reserve the resources occupied by a MCInstrDesc and -// change the current state to reflect that change. + +// Reserve the resources occupied by a MCInstrDesc and change the current +// state to reflect that change. void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) { unsigned InsnClass = MID->getSchedClass(); DFAInput InsnInput = getInsnInput(InsnClass); @@ -131,34 +137,46 @@ void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) { } -// canReserveResources - Check if the resources occupied by a machine -// instruction are available in the current state. -bool DFAPacketizer::canReserveResources(llvm::MachineInstr *MI) { - const llvm::MCInstrDesc &MID = MI->getDesc(); +// Check if the resources occupied by a machine instruction are available +// in the current state. +bool DFAPacketizer::canReserveResources(llvm::MachineInstr &MI) { + const llvm::MCInstrDesc &MID = MI.getDesc(); return canReserveResources(&MID); } -// reserveResources - Reserve the resources occupied by a machine -// instruction and change the current state to reflect that change. -void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) { - const llvm::MCInstrDesc &MID = MI->getDesc(); + +// Reserve the resources occupied by a machine instruction and change the +// current state to reflect that change. +void DFAPacketizer::reserveResources(llvm::MachineInstr &MI) { + const llvm::MCInstrDesc &MID = MI.getDesc(); reserveResources(&MID); } + namespace llvm { -// DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides -// Schedule method to build the dependence graph. +// This class extends ScheduleDAGInstrs and overrides the schedule method +// to build the dependence graph. class DefaultVLIWScheduler : public ScheduleDAGInstrs { private: AliasAnalysis *AA; + /// Ordered list of DAG postprocessing steps. + std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; public: DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA); - // Schedule - Actual scheduling work. + // Actual scheduling work. void schedule() override; + + /// DefaultVLIWScheduler takes ownership of the Mutation object. + void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) { + Mutations.push_back(std::move(Mutation)); + } +protected: + void postprocessDAG(); }; } + DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA) @@ -166,42 +184,51 @@ DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF, CanHandleTerminators = true; } + +/// Apply each ScheduleDAGMutation step in order. +void DefaultVLIWScheduler::postprocessDAG() { + for (auto &M : Mutations) + M->apply(this); +} + + void DefaultVLIWScheduler::schedule() { // Build the scheduling graph. buildSchedGraph(AA); + postprocessDAG(); } -// VLIWPacketizerList Ctor -VLIWPacketizerList::VLIWPacketizerList(MachineFunction &MF, - MachineLoopInfo &MLI, AliasAnalysis *AA) - : MF(MF), AA(AA) { - TII = MF.getSubtarget().getInstrInfo(); + +VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf, + MachineLoopInfo &mli, AliasAnalysis *aa) + : MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) { ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget()); - VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, AA); + VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA); } -// VLIWPacketizerList Dtor + VLIWPacketizerList::~VLIWPacketizerList() { if (VLIWScheduler) delete VLIWScheduler; - if (ResourceTracker) delete ResourceTracker; } -// endPacket - End the current packet, bundle packet instructions and reset -// DFA state. + +// End the current packet, bundle packet instructions and reset DFA state. void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, - MachineInstr *MI) { + MachineBasicBlock::iterator MI) { if (CurrentPacketMIs.size() > 1) { - MachineInstr *MIFirst = CurrentPacketMIs.front(); - finalizeBundle(*MBB, MIFirst->getIterator(), MI->getIterator()); + MachineInstr &MIFirst = *CurrentPacketMIs.front(); + finalizeBundle(*MBB, MIFirst.getIterator(), MI.getInstrIterator()); } CurrentPacketMIs.clear(); ResourceTracker->clearResources(); + DEBUG(dbgs() << "End packet\n"); } -// PacketizeMIs - Bundle machine instructions into packets. + +// Bundle machine instructions into packets. void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, MachineBasicBlock::iterator BeginItr, MachineBasicBlock::iterator EndItr) { @@ -211,64 +238,88 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, std::distance(BeginItr, EndItr)); VLIWScheduler->schedule(); + DEBUG({ + dbgs() << "Scheduling DAG of the packetize region\n"; + for (SUnit &SU : VLIWScheduler->SUnits) + SU.dumpAll(VLIWScheduler); + }); + // Generate MI -> SU map. MIToSUnit.clear(); - for (unsigned i = 0, e = VLIWScheduler->SUnits.size(); i != e; ++i) { - SUnit *SU = &VLIWScheduler->SUnits[i]; - MIToSUnit[SU->getInstr()] = SU; - } + for (SUnit &SU : VLIWScheduler->SUnits) + MIToSUnit[SU.getInstr()] = &SU; // The main packetizer loop. for (; BeginItr != EndItr; ++BeginItr) { - MachineInstr *MI = BeginItr; - - this->initPacketizerState(); + MachineInstr &MI = *BeginItr; + initPacketizerState(); // End the current packet if needed. - if (this->isSoloInstruction(MI)) { + if (isSoloInstruction(MI)) { endPacket(MBB, MI); continue; } // Ignore pseudo instructions. - if (this->ignorePseudoInstruction(MI, MBB)) + if (ignorePseudoInstruction(MI, MBB)) continue; - SUnit *SUI = MIToSUnit[MI]; + SUnit *SUI = MIToSUnit[&MI]; assert(SUI && "Missing SUnit Info!"); // Ask DFA if machine resource is available for MI. + DEBUG(dbgs() << "Checking resources for adding MI to packet " << MI); + bool ResourceAvail = ResourceTracker->canReserveResources(MI); + DEBUG({ + if (ResourceAvail) + dbgs() << " Resources are available for adding MI to packet\n"; + else + dbgs() << " Resources NOT available\n"; + }); if (ResourceAvail && shouldAddToPacket(MI)) { // Dependency check for MI with instructions in CurrentPacketMIs. - for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(), - VE = CurrentPacketMIs.end(); VI != VE; ++VI) { - MachineInstr *MJ = *VI; + for (auto MJ : CurrentPacketMIs) { SUnit *SUJ = MIToSUnit[MJ]; assert(SUJ && "Missing SUnit Info!"); + DEBUG(dbgs() << " Checking against MJ " << *MJ); // Is it legal to packetize SUI and SUJ together. - if (!this->isLegalToPacketizeTogether(SUI, SUJ)) { + if (!isLegalToPacketizeTogether(SUI, SUJ)) { + DEBUG(dbgs() << " Not legal to add MI, try to prune\n"); // Allow packetization if dependency can be pruned. - if (!this->isLegalToPruneDependencies(SUI, SUJ)) { + if (!isLegalToPruneDependencies(SUI, SUJ)) { // End the packet if dependency cannot be pruned. + DEBUG(dbgs() << " Could not prune dependencies for adding MI\n"); endPacket(MBB, MI); break; - } // !isLegalToPruneDependencies. - } // !isLegalToPacketizeTogether. - } // For all instructions in CurrentPacketMIs. + } + DEBUG(dbgs() << " Pruned dependence for adding MI\n"); + } + } } else { + DEBUG(if (ResourceAvail) + dbgs() << "Resources are available, but instruction should not be " + "added to packet\n " << MI); // End the packet if resource is not available, or if the instruction // shoud not be added to the current packet. endPacket(MBB, MI); } // Add MI to the current packet. - BeginItr = this->addToPacket(MI); - } // For all instructions in BB. + DEBUG(dbgs() << "* Adding MI to packet " << MI << '\n'); + BeginItr = addToPacket(MI); + } // For all instructions in the packetization range. // End any packet left behind. endPacket(MBB, EndItr); VLIWScheduler->exitRegion(); VLIWScheduler->finishBlock(); } + + +// Add a DAG mutation object to the ordered list. +void VLIWPacketizerList::addMutation( + std::unique_ptr<ScheduleDAGMutation> Mutation) { + VLIWScheduler->addMutation(std::move(Mutation)); +} diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index b11b497..0b8dc7a 100644 --- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -42,6 +42,11 @@ namespace { initializeDeadMachineInstructionElimPass(*PassRegistry::getPassRegistry()); } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + private: bool isDead(const MachineInstr *MI) const; }; @@ -90,7 +95,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { } bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { - if (skipOptnoneFunction(*MF.getFunction())) + if (skipFunction(*MF.getFunction())) return false; bool AnyChanges = false; diff --git a/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp new file mode 100644 index 0000000..1d9e79c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp @@ -0,0 +1,602 @@ +//===- DetectDeadLanes.cpp - SubRegister Lane Usage Analysis --*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Analysis that tracks defined/used subregister lanes across COPY instructions +/// and instructions that get lowered to a COPY (PHI, REG_SEQUENCE, +/// INSERT_SUBREG, EXTRACT_SUBREG). +/// The information is used to detect dead definitions and the usage of +/// (completely) undefined values and mark the operands as such. +/// This pass is necessary because the dead/undef status is not obvious anymore +/// when subregisters are involved. +/// +/// Example: +/// %vreg0 = some definition +/// %vreg1 = IMPLICIT_DEF +/// %vreg2 = REG_SEQUENCE %vreg0, sub0, %vreg1, sub1 +/// %vreg3 = EXTRACT_SUBREG %vreg2, sub1 +/// = use %vreg3 +/// The %vreg0 definition is dead and %vreg3 contains an undefined value. +// +//===----------------------------------------------------------------------===// + +#include <deque> +#include <vector> + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "detect-dead-lanes" + +namespace { + +/// Contains a bitmask of which lanes of a given virtual register are +/// defined and which ones are actually used. +struct VRegInfo { + LaneBitmask UsedLanes; + LaneBitmask DefinedLanes; +}; + +class DetectDeadLanes : public MachineFunctionPass { +public: + bool runOnMachineFunction(MachineFunction &MF) override; + + static char ID; + DetectDeadLanes() : MachineFunctionPass(ID) {} + + const char *getPassName() const override { return "Detect Dead Lanes"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + /// Add used lane bits on the register used by operand \p MO. This translates + /// the bitmask based on the operands subregister, and puts the register into + /// the worklist if any new bits were added. + void addUsedLanesOnOperand(const MachineOperand &MO, LaneBitmask UsedLanes); + + /// Given a bitmask \p UsedLanes for the used lanes on a def output of a + /// COPY-like instruction determine the lanes used on the use operands + /// and call addUsedLanesOnOperand() for them. + void transferUsedLanesStep(const MachineInstr &MI, LaneBitmask UsedLanes); + + /// Given a use regiser operand \p Use and a mask of defined lanes, check + /// if the operand belongs to a lowersToCopies() instruction, transfer the + /// mask to the def and put the instruction into the worklist. + void transferDefinedLanesStep(const MachineOperand &Use, + LaneBitmask DefinedLanes); + + /// Given a mask \p DefinedLanes of lanes defined at operand \p OpNum + /// of COPY-like instruction, determine which lanes are defined at the output + /// operand \p Def. + LaneBitmask transferDefinedLanes(const MachineOperand &Def, unsigned OpNum, + LaneBitmask DefinedLanes) const; + + /// Given a mask \p UsedLanes used from the output of instruction \p MI + /// determine which lanes are used from operand \p MO of this instruction. + LaneBitmask transferUsedLanes(const MachineInstr &MI, LaneBitmask UsedLanes, + const MachineOperand &MO) const; + + bool runOnce(MachineFunction &MF); + + LaneBitmask determineInitialDefinedLanes(unsigned Reg); + LaneBitmask determineInitialUsedLanes(unsigned Reg); + + bool isUndefRegAtInput(const MachineOperand &MO, + const VRegInfo &RegInfo) const; + + bool isUndefInput(const MachineOperand &MO, bool *CrossCopy) const; + + const MachineRegisterInfo *MRI; + const TargetRegisterInfo *TRI; + + void PutInWorklist(unsigned RegIdx) { + if (WorklistMembers.test(RegIdx)) + return; + WorklistMembers.set(RegIdx); + Worklist.push_back(RegIdx); + } + + VRegInfo *VRegInfos; + /// Worklist containing virtreg indexes. + std::deque<unsigned> Worklist; + BitVector WorklistMembers; + /// This bitvector is set for each vreg index where the vreg is defined + /// by an instruction where lowersToCopies()==true. + BitVector DefinedByCopy; +}; + +} // end anonymous namespace + +char DetectDeadLanes::ID = 0; +char &llvm::DetectDeadLanesID = DetectDeadLanes::ID; + +INITIALIZE_PASS(DetectDeadLanes, "detect-dead-lanes", "Detect Dead Lanes", + false, false) + +/// Returns true if \p MI will get lowered to a series of COPY instructions. +/// We call this a COPY-like instruction. +static bool lowersToCopies(const MachineInstr &MI) { + // Note: We could support instructions with MCInstrDesc::isRegSequenceLike(), + // isExtractSubRegLike(), isInsertSubregLike() in the future even though they + // are not lowered to a COPY. + switch (MI.getOpcode()) { + case TargetOpcode::COPY: + case TargetOpcode::PHI: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::EXTRACT_SUBREG: + return true; + } + return false; +} + +static bool isCrossCopy(const MachineRegisterInfo &MRI, + const MachineInstr &MI, + const TargetRegisterClass *DstRC, + const MachineOperand &MO) { + assert(lowersToCopies(MI)); + unsigned SrcReg = MO.getReg(); + const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); + if (DstRC == SrcRC) + return false; + + unsigned SrcSubIdx = MO.getSubReg(); + + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + unsigned DstSubIdx = 0; + switch (MI.getOpcode()) { + case TargetOpcode::INSERT_SUBREG: + if (MI.getOperandNo(&MO) == 2) + DstSubIdx = MI.getOperand(3).getImm(); + break; + case TargetOpcode::REG_SEQUENCE: { + unsigned OpNum = MI.getOperandNo(&MO); + DstSubIdx = MI.getOperand(OpNum+1).getImm(); + break; + } + case TargetOpcode::EXTRACT_SUBREG: { + unsigned SubReg = MI.getOperand(2).getImm(); + SrcSubIdx = TRI.composeSubRegIndices(SubReg, SrcSubIdx); + } + } + + unsigned PreA, PreB; // Unused. + if (SrcSubIdx && DstSubIdx) + return !TRI.getCommonSuperRegClass(SrcRC, SrcSubIdx, DstRC, DstSubIdx, PreA, + PreB); + if (SrcSubIdx) + return !TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSubIdx); + if (DstSubIdx) + return !TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSubIdx); + return !TRI.getCommonSubClass(SrcRC, DstRC); +} + +void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO, + LaneBitmask UsedLanes) { + if (!MO.readsReg()) + return; + unsigned MOReg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(MOReg)) + return; + + unsigned MOSubReg = MO.getSubReg(); + if (MOSubReg != 0) + UsedLanes = TRI->composeSubRegIndexLaneMask(MOSubReg, UsedLanes); + UsedLanes &= MRI->getMaxLaneMaskForVReg(MOReg); + + unsigned MORegIdx = TargetRegisterInfo::virtReg2Index(MOReg); + VRegInfo &MORegInfo = VRegInfos[MORegIdx]; + LaneBitmask PrevUsedLanes = MORegInfo.UsedLanes; + // Any change at all? + if ((UsedLanes & ~PrevUsedLanes) == 0) + return; + + // Set UsedLanes and remember instruction for further propagation. + MORegInfo.UsedLanes = PrevUsedLanes | UsedLanes; + if (DefinedByCopy.test(MORegIdx)) + PutInWorklist(MORegIdx); +} + +void DetectDeadLanes::transferUsedLanesStep(const MachineInstr &MI, + LaneBitmask UsedLanes) { + for (const MachineOperand &MO : MI.uses()) { + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + LaneBitmask UsedOnMO = transferUsedLanes(MI, UsedLanes, MO); + addUsedLanesOnOperand(MO, UsedOnMO); + } +} + +LaneBitmask DetectDeadLanes::transferUsedLanes(const MachineInstr &MI, + LaneBitmask UsedLanes, + const MachineOperand &MO) const { + unsigned OpNum = MI.getOperandNo(&MO); + assert(lowersToCopies(MI) && DefinedByCopy[ + TargetRegisterInfo::virtReg2Index(MI.getOperand(0).getReg())]); + + switch (MI.getOpcode()) { + case TargetOpcode::COPY: + case TargetOpcode::PHI: + return UsedLanes; + case TargetOpcode::REG_SEQUENCE: { + assert(OpNum % 2 == 1); + unsigned SubIdx = MI.getOperand(OpNum + 1).getImm(); + return TRI->reverseComposeSubRegIndexLaneMask(SubIdx, UsedLanes); + } + case TargetOpcode::INSERT_SUBREG: { + unsigned SubIdx = MI.getOperand(3).getImm(); + LaneBitmask MO2UsedLanes = + TRI->reverseComposeSubRegIndexLaneMask(SubIdx, UsedLanes); + if (OpNum == 2) + return MO2UsedLanes; + + const MachineOperand &Def = MI.getOperand(0); + unsigned DefReg = Def.getReg(); + const TargetRegisterClass *RC = MRI->getRegClass(DefReg); + LaneBitmask MO1UsedLanes; + if (RC->CoveredBySubRegs) + MO1UsedLanes = UsedLanes & ~TRI->getSubRegIndexLaneMask(SubIdx); + else + MO1UsedLanes = RC->LaneMask; + + assert(OpNum == 1); + return MO1UsedLanes; + } + case TargetOpcode::EXTRACT_SUBREG: { + assert(OpNum == 1); + unsigned SubIdx = MI.getOperand(2).getImm(); + return TRI->composeSubRegIndexLaneMask(SubIdx, UsedLanes); + } + default: + llvm_unreachable("function must be called with COPY-like instruction"); + } +} + +void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use, + LaneBitmask DefinedLanes) { + if (!Use.readsReg()) + return; + // Check whether the operand writes a vreg and is part of a COPY-like + // instruction. + const MachineInstr &MI = *Use.getParent(); + if (MI.getDesc().getNumDefs() != 1) + return; + // FIXME: PATCHPOINT instructions announce a Def that does not always exist, + // they really need to be modeled differently! + if (MI.getOpcode() == TargetOpcode::PATCHPOINT) + return; + const MachineOperand &Def = *MI.defs().begin(); + unsigned DefReg = Def.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DefReg)) + return; + unsigned DefRegIdx = TargetRegisterInfo::virtReg2Index(DefReg); + if (!DefinedByCopy.test(DefRegIdx)) + return; + + unsigned OpNum = MI.getOperandNo(&Use); + DefinedLanes = + TRI->reverseComposeSubRegIndexLaneMask(Use.getSubReg(), DefinedLanes); + DefinedLanes = transferDefinedLanes(Def, OpNum, DefinedLanes); + + VRegInfo &RegInfo = VRegInfos[DefRegIdx]; + LaneBitmask PrevDefinedLanes = RegInfo.DefinedLanes; + // Any change at all? + if ((DefinedLanes & ~PrevDefinedLanes) == 0) + return; + + RegInfo.DefinedLanes = PrevDefinedLanes | DefinedLanes; + PutInWorklist(DefRegIdx); +} + +LaneBitmask DetectDeadLanes::transferDefinedLanes(const MachineOperand &Def, + unsigned OpNum, LaneBitmask DefinedLanes) const { + const MachineInstr &MI = *Def.getParent(); + // Translate DefinedLanes if necessary. + switch (MI.getOpcode()) { + case TargetOpcode::REG_SEQUENCE: { + unsigned SubIdx = MI.getOperand(OpNum + 1).getImm(); + DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes); + DefinedLanes &= TRI->getSubRegIndexLaneMask(SubIdx); + break; + } + case TargetOpcode::INSERT_SUBREG: { + unsigned SubIdx = MI.getOperand(3).getImm(); + if (OpNum == 2) { + DefinedLanes = TRI->composeSubRegIndexLaneMask(SubIdx, DefinedLanes); + DefinedLanes &= TRI->getSubRegIndexLaneMask(SubIdx); + } else { + assert(OpNum == 1 && "INSERT_SUBREG must have two operands"); + // Ignore lanes defined by operand 2. + DefinedLanes &= ~TRI->getSubRegIndexLaneMask(SubIdx); + } + break; + } + case TargetOpcode::EXTRACT_SUBREG: { + unsigned SubIdx = MI.getOperand(2).getImm(); + assert(OpNum == 1 && "EXTRACT_SUBREG must have one register operand only"); + DefinedLanes = TRI->reverseComposeSubRegIndexLaneMask(SubIdx, DefinedLanes); + break; + } + case TargetOpcode::COPY: + case TargetOpcode::PHI: + break; + default: + llvm_unreachable("function must be called with COPY-like instruction"); + } + + assert(Def.getSubReg() == 0 && + "Should not have subregister defs in machine SSA phase"); + DefinedLanes &= MRI->getMaxLaneMaskForVReg(Def.getReg()); + return DefinedLanes; +} + +LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) { + // Live-In or unused registers have no definition but are considered fully + // defined. + if (!MRI->hasOneDef(Reg)) + return ~0u; + + const MachineOperand &Def = *MRI->def_begin(Reg); + const MachineInstr &DefMI = *Def.getParent(); + if (lowersToCopies(DefMI)) { + // Start optimisatically with no used or defined lanes for copy + // instructions. The following dataflow analysis will add more bits. + unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg); + DefinedByCopy.set(RegIdx); + PutInWorklist(RegIdx); + + if (Def.isDead()) + return 0; + + // COPY/PHI can copy across unrelated register classes (example: float/int) + // with incompatible subregister structure. Do not include these in the + // dataflow analysis since we cannot transfer lanemasks in a meaningful way. + const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); + + // Determine initially DefinedLanes. + LaneBitmask DefinedLanes = 0; + for (const MachineOperand &MO : DefMI.uses()) { + if (!MO.isReg() || !MO.readsReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + + LaneBitmask MODefinedLanes; + if (TargetRegisterInfo::isPhysicalRegister(MOReg)) { + MODefinedLanes = ~0u; + } else if (isCrossCopy(*MRI, DefMI, DefRC, MO)) { + MODefinedLanes = ~0u; + } else { + assert(TargetRegisterInfo::isVirtualRegister(MOReg)); + if (MRI->hasOneDef(MOReg)) { + const MachineOperand &MODef = *MRI->def_begin(MOReg); + const MachineInstr &MODefMI = *MODef.getParent(); + // Bits from copy-like operations will be added later. + if (lowersToCopies(MODefMI) || MODefMI.isImplicitDef()) + continue; + } + unsigned MOSubReg = MO.getSubReg(); + MODefinedLanes = MRI->getMaxLaneMaskForVReg(MOReg); + MODefinedLanes = TRI->reverseComposeSubRegIndexLaneMask( + MOSubReg, MODefinedLanes); + } + + unsigned OpNum = DefMI.getOperandNo(&MO); + DefinedLanes |= transferDefinedLanes(Def, OpNum, MODefinedLanes); + } + return DefinedLanes; + } + if (DefMI.isImplicitDef() || Def.isDead()) + return 0; + + assert(Def.getSubReg() == 0 && + "Should not have subregister defs in machine SSA phase"); + return MRI->getMaxLaneMaskForVReg(Reg); +} + +LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) { + LaneBitmask UsedLanes = 0; + for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { + if (!MO.readsReg()) + continue; + + const MachineInstr &UseMI = *MO.getParent(); + if (UseMI.isKill()) + continue; + + unsigned SubReg = MO.getSubReg(); + if (lowersToCopies(UseMI)) { + assert(UseMI.getDesc().getNumDefs() == 1); + const MachineOperand &Def = *UseMI.defs().begin(); + unsigned DefReg = Def.getReg(); + // The used lanes of COPY-like instruction operands are determined by the + // following dataflow analysis. + if (TargetRegisterInfo::isVirtualRegister(DefReg)) { + // But ignore copies across incompatible register classes. + bool CrossCopy = false; + if (lowersToCopies(UseMI)) { + const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg); + CrossCopy = isCrossCopy(*MRI, UseMI, DstRC, MO); + if (CrossCopy) + DEBUG(dbgs() << "Copy accross incompatible classes: " << UseMI); + } + + if (!CrossCopy) + continue; + } + } + + // Shortcut: All lanes are used. + if (SubReg == 0) + return MRI->getMaxLaneMaskForVReg(Reg); + + UsedLanes |= TRI->getSubRegIndexLaneMask(SubReg); + } + return UsedLanes; +} + +bool DetectDeadLanes::isUndefRegAtInput(const MachineOperand &MO, + const VRegInfo &RegInfo) const { + unsigned SubReg = MO.getSubReg(); + LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg); + return (RegInfo.DefinedLanes & RegInfo.UsedLanes & Mask) == 0; +} + +bool DetectDeadLanes::isUndefInput(const MachineOperand &MO, + bool *CrossCopy) const { + if (!MO.isUse()) + return false; + const MachineInstr &MI = *MO.getParent(); + if (!lowersToCopies(MI)) + return false; + const MachineOperand &Def = MI.getOperand(0); + unsigned DefReg = Def.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DefReg)) + return false; + unsigned DefRegIdx = TargetRegisterInfo::virtReg2Index(DefReg); + if (!DefinedByCopy.test(DefRegIdx)) + return false; + + const VRegInfo &DefRegInfo = VRegInfos[DefRegIdx]; + LaneBitmask UsedLanes = transferUsedLanes(MI, DefRegInfo.UsedLanes, MO); + if (UsedLanes != 0) + return false; + + unsigned MOReg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(MOReg)) { + const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg); + *CrossCopy = isCrossCopy(*MRI, MI, DstRC, MO); + } + return true; +} + +bool DetectDeadLanes::runOnce(MachineFunction &MF) { + // First pass: Populate defs/uses of vregs with initial values + unsigned NumVirtRegs = MRI->getNumVirtRegs(); + for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + + // Determine used/defined lanes and add copy instructions to worklist. + VRegInfo &Info = VRegInfos[RegIdx]; + Info.DefinedLanes = determineInitialDefinedLanes(Reg); + Info.UsedLanes = determineInitialUsedLanes(Reg); + } + + // Iterate as long as defined lanes/used lanes keep changing. + while (!Worklist.empty()) { + unsigned RegIdx = Worklist.front(); + Worklist.pop_front(); + WorklistMembers.reset(RegIdx); + VRegInfo &Info = VRegInfos[RegIdx]; + unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + + // Transfer UsedLanes to operands of DefMI (backwards dataflow). + MachineOperand &Def = *MRI->def_begin(Reg); + const MachineInstr &MI = *Def.getParent(); + transferUsedLanesStep(MI, Info.UsedLanes); + // Transfer DefinedLanes to users of Reg (forward dataflow). + for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg)) + transferDefinedLanesStep(MO, Info.DefinedLanes); + } + + DEBUG( + dbgs() << "Defined/Used lanes:\n"; + for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx); + const VRegInfo &Info = VRegInfos[RegIdx]; + dbgs() << PrintReg(Reg, nullptr) + << " Used: " << PrintLaneMask(Info.UsedLanes) + << " Def: " << PrintLaneMask(Info.DefinedLanes) << '\n'; + } + dbgs() << "\n"; + ); + + bool Again = false; + // Mark operands as dead/unused. + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + for (MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg); + const VRegInfo &RegInfo = VRegInfos[RegIdx]; + if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes == 0) { + DEBUG(dbgs() << "Marking operand '" << MO << "' as dead in " << MI); + MO.setIsDead(); + } + if (MO.readsReg()) { + bool CrossCopy = false; + if (isUndefRegAtInput(MO, RegInfo)) { + DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in " + << MI); + MO.setIsUndef(); + } else if (isUndefInput(MO, &CrossCopy)) { + DEBUG(dbgs() << "Marking operand '" << MO << "' as undef in " + << MI); + MO.setIsUndef(); + if (CrossCopy) + Again = true; + } + } + } + } + } + + return Again; +} + +bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) { + // Don't bother if we won't track subregister liveness later. This pass is + // required for correctness if subregister liveness is enabled because the + // register coalescer cannot deal with hidden dead defs. However without + // subregister liveness enabled, the expected benefits of this pass are small + // so we safe the compile time. + if (!MF.getSubtarget().enableSubRegLiveness()) { + DEBUG(dbgs() << "Skipping Detect dead lanes pass\n"); + return false; + } + + MRI = &MF.getRegInfo(); + TRI = MRI->getTargetRegisterInfo(); + + unsigned NumVirtRegs = MRI->getNumVirtRegs(); + VRegInfos = new VRegInfo[NumVirtRegs]; + WorklistMembers.resize(NumVirtRegs); + DefinedByCopy.resize(NumVirtRegs); + + bool Again; + do { + Again = runOnce(MF); + } while(Again); + + DefinedByCopy.clear(); + WorklistMembers.clear(); + delete[] VRegInfos; + return true; +} diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp index f3536d7..8c96124 100644 --- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -278,7 +278,7 @@ bool SSAIfConv::findInsertionPoint() { while (I != B) { --I; // Some of the conditional code depends in I. - if (InsertAfter.count(I)) { + if (InsertAfter.count(&*I)) { DEBUG(dbgs() << "Can't insert code after " << *I); return false; } @@ -386,7 +386,7 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { // The branch we're looking to eliminate must be analyzable. Cond.clear(); - if (TII->AnalyzeBranch(*Head, TBB, FBB, Cond)) { + if (TII->analyzeBranch(*Head, TBB, FBB, Cond)) { DEBUG(dbgs() << "Branch not analyzable.\n"); return false; } @@ -480,7 +480,7 @@ void SSAIfConv::rewritePHIOperands() { for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { PHIInfo &PI = PHIs[i]; unsigned DstReg = 0; - + DEBUG(dbgs() << "If-converting " << *PI.PHI); if (PI.TReg == PI.FReg) { // We do not need the select instruction if both incoming values are @@ -718,7 +718,7 @@ bool EarlyIfConverter::shouldConvertIf() { // TBB / FBB data dependencies may delay the select even more. MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head); unsigned BranchDepth = - HeadTrace.getInstrCycles(IfConv.Head->getFirstTerminator()).Depth; + HeadTrace.getInstrCycles(*IfConv.Head->getFirstTerminator()).Depth; DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n'); // Look at all the tail phis, and compute the critical path extension caused @@ -726,8 +726,8 @@ bool EarlyIfConverter::shouldConvertIf() { MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail); for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) { SSAIfConv::PHIInfo &PI = IfConv.PHIs[i]; - unsigned Slack = TailTrace.getInstrSlack(PI.PHI); - unsigned MaxDepth = Slack + TailTrace.getInstrCycles(PI.PHI).Depth; + unsigned Slack = TailTrace.getInstrSlack(*PI.PHI); + unsigned MaxDepth = Slack + TailTrace.getInstrCycles(*PI.PHI).Depth; DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI); // The condition is pulled into the critical path. @@ -742,7 +742,7 @@ bool EarlyIfConverter::shouldConvertIf() { } // The TBB value is pulled into the critical path. - unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(PI.PHI), PI.TCycles); + unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(*PI.PHI), PI.TCycles); if (TDepth > MaxDepth) { unsigned Extra = TDepth - MaxDepth; DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n"); @@ -753,7 +753,7 @@ bool EarlyIfConverter::shouldConvertIf() { } // The FBB value is pulled into the critical path. - unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(PI.PHI), PI.FCycles); + unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(*PI.PHI), PI.FCycles); if (FDepth > MaxDepth) { unsigned Extra = FDepth - MaxDepth; DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n"); @@ -785,6 +785,9 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" << "********** Function: " << MF.getName() << '\n'); + if (skipFunction(*MF.getFunction())) + return false; + // Only run if conversion if the target wants it. const TargetSubtargetInfo &STI = MF.getSubtarget(); if (!STI.enableEarlyIfConversion()) diff --git a/contrib/llvm/lib/CodeGen/ErlangGC.cpp b/contrib/llvm/lib/CodeGen/ErlangGC.cpp deleted file mode 100644 index 024946d..0000000 --- a/contrib/llvm/lib/CodeGen/ErlangGC.cpp +++ /dev/null @@ -1,46 +0,0 @@ -//===-- ErlangGC.cpp - Erlang/OTP GC strategy -------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the Erlang/OTP runtime-compatible garbage collector -// (e.g. defines safe points, root initialization etc.) -// -// The frametable emitter is in ErlangGCPrinter.cpp. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/GCs.h" -#include "llvm/CodeGen/GCStrategy.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSubtargetInfo.h" - -using namespace llvm; - -namespace { - -class ErlangGC : public GCStrategy { -public: - ErlangGC(); -}; -} - -static GCRegistry::Add<ErlangGC> X("erlang", - "erlang-compatible garbage collector"); - -void llvm::linkErlangGC() {} - -ErlangGC::ErlangGC() { - InitRoots = false; - NeededSafePoints = 1 << GC::PostCall; - UsesMetadata = true; - CustomRoots = false; -} diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp index c550008..566b8d5 100644 --- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp +++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp @@ -168,6 +168,11 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } + const char *getPassName() const override { return "Execution dependency fix"; } @@ -315,7 +320,7 @@ void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) { // Collapse all the instructions. while (!dv->Instrs.empty()) - TII->setExecutionDomain(dv->Instrs.pop_back_val(), domain); + TII->setExecutionDomain(*dv->Instrs.pop_back_val(), domain); dv->setSingleDomain(domain); // If there are multiple users, give them new, unique DomainValues. @@ -455,7 +460,7 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) { return; // Update instructions with explicit execution domains. - std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(MI); + std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI); if (DomP.first) { if (DomP.second) visitSoftInstr(MI, DomP.second); @@ -503,7 +508,7 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { // Break dependence on undef uses. Do this before updating LiveRegs below. unsigned OpNum; - unsigned Pref = TII->getUndefRegClearance(MI, OpNum, TRI); + unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI); if (Pref) { if (shouldBreakDependence(MI, OpNum, Pref)) UndefReads.push_back(std::make_pair(MI, OpNum)); @@ -526,9 +531,9 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { // Check clearance before partial register updates. // Call breakDependence before setting LiveRegs[rx].Def. - unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI); + unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI); if (Pref && shouldBreakDependence(MI, i, Pref)) - TII->breakPartialRegDependency(MI, i, TRI); + TII->breakPartialRegDependency(*MI, i, TRI); // How many instructions since rx was last written? LiveRegs[rx].Def = CurInstr; @@ -553,7 +558,9 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) { // Collect this block's live out register units. LiveRegSet.init(TRI); - LiveRegSet.addLiveOuts(MBB); + // We do not need to care about pristine registers as they are just preserved + // but not actually used in the function. + LiveRegSet.addLiveOutsNoPristines(*MBB); MachineInstr *UndefMI = UndefReads.back().first; unsigned OpIdx = UndefReads.back().second; @@ -564,7 +571,7 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) { if (UndefMI == &I) { if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg())) - TII->breakPartialRegDependency(UndefMI, OpIdx, TRI); + TII->breakPartialRegDependency(*UndefMI, OpIdx, TRI); UndefReads.pop_back(); if (UndefReads.empty()) @@ -638,7 +645,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // If the collapsed operands force a single domain, propagate the collapse. if (isPowerOf2_32(available)) { unsigned domain = countTrailingZeros(available); - TII->setExecutionDomain(mi, domain); + TII->setExecutionDomain(*mi, domain); visitHardInstr(mi, domain); return; } @@ -719,6 +726,8 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { } bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { + if (skipFunction(*mf.getFunction())) + return false; MF = &mf; TII = MF->getSubtarget().getInstrInfo(); TRI = MF->getSubtarget().getRegisterInfo(); diff --git a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp index 90ddac9..0ec79c2 100644 --- a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp +++ b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp @@ -53,13 +53,12 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock *MBB = &*I; for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end(); MBBI != MBBE; ) { - MachineInstr *MI = MBBI++; + MachineInstr &MI = *MBBI++; // If MI is a pseudo, expand it. - if (MI->usesCustomInsertionHook()) { + if (MI.usesCustomInsertionHook()) { Changed = true; - MachineBasicBlock *NewMBB = - TLI->EmitInstrWithCustomInserter(MI, MBB); + MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB); // The expansion may involve new basic blocks. if (NewMBB != MBB) { MBB = NewMBB; diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp index e7bf143..ab2382e 100644 --- a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -51,7 +51,7 @@ private: bool LowerSubregToReg(MachineInstr *MI); bool LowerCopy(MachineInstr *MI); - void TransferImplicitDefs(MachineInstr *MI); + void TransferImplicitOperands(MachineInstr *MI); }; } // end anonymous namespace @@ -61,20 +61,16 @@ char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID; INITIALIZE_PASS(ExpandPostRA, "postrapseudos", "Post-RA pseudo instruction expansion pass", false, false) -/// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered -/// replacement instructions immediately precede it. Copy any implicit-def +/// TransferImplicitOperands - MI is a pseudo-instruction, and the lowered +/// replacement instructions immediately precede it. Copy any implicit /// operands from MI to the replacement instruction. -void -ExpandPostRA::TransferImplicitDefs(MachineInstr *MI) { +void ExpandPostRA::TransferImplicitOperands(MachineInstr *MI) { MachineBasicBlock::iterator CopyMI = MI; --CopyMI; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isImplicit() || MO.isUse()) - continue; - CopyMI->addOperand(MachineOperand::CreateReg(MO.getReg(), true, true)); - } + for (const MachineOperand &MO : MI->implicit_operands()) + if (MO.isReg()) + CopyMI->addOperand(MO); } bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { @@ -167,7 +163,7 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) { DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill()); if (MI->getNumOperands() > 2) - TransferImplicitDefs(MI); + TransferImplicitOperands(MI); DEBUG({ MachineBasicBlock::iterator dMI = MI; dbgs() << "replaced by: " << *(--dMI); @@ -192,12 +188,12 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) { mbbi != mbbe; ++mbbi) { for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); mi != me;) { - MachineInstr *MI = mi; + MachineInstr &MI = *mi; // Advance iterator here because MI may be erased. ++mi; // Only expand pseudos. - if (!MI->isPseudo()) + if (!MI.isPseudo()) continue; // Give targets a chance to expand even standard pseudos. @@ -207,12 +203,12 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) { } // Expand standard pseudos. - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { case TargetOpcode::SUBREG_TO_REG: - MadeChange |= LowerSubregToReg(MI); + MadeChange |= LowerSubregToReg(&MI); break; case TargetOpcode::COPY: - MadeChange |= LowerCopy(MI); + MadeChange |= LowerCopy(&MI); break; case TargetOpcode::DBG_VALUE: continue; diff --git a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp index 8b2f505..b16f81c 100644 --- a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp +++ b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp @@ -28,6 +28,10 @@ public: } bool runOnMachineFunction(MachineFunction &F) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } }; } diff --git a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp index 484d317..326adab 100644 --- a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp +++ b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp @@ -64,7 +64,7 @@ class GCMachineCodeAnalysis : public MachineFunctionPass { void FindSafePoints(MachineFunction &MF); void VisitCallPoint(MachineBasicBlock::iterator MI); MCSymbol *InsertLabel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - DebugLoc DL) const; + const DebugLoc &DL) const; void FindStackOffsets(MachineFunction &MF); @@ -170,8 +170,7 @@ static bool InsertRootInitializers(Function &F, AllocaInst **Roots, for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I) if (!InitedRoots.count(*I)) { StoreInst *SI = new StoreInst( - ConstantPointerNull::get(cast<PointerType>( - cast<PointerType>((*I)->getType())->getElementType())), + ConstantPointerNull::get(cast<PointerType>((*I)->getAllocatedType())), *I); SI->insertAfter(*I); MadeChange = true; @@ -271,7 +270,7 @@ void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - DebugLoc DL) const { + const DebugLoc &DL) const { MCSymbol *Label = MBB.getParent()->getContext().createTempSymbol(); BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label); return Label; diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp new file mode 100644 index 0000000..231e5ac --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp @@ -0,0 +1,30 @@ +//===-- llvm/CodeGen/GlobalISel/GlobalIsel.cpp --- GlobalISel ----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +// This file implements the common initialization routines for the +// GlobalISel library. +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm/PassRegistry.h" + +using namespace llvm; + +#ifndef LLVM_BUILD_GLOBAL_ISEL + +void llvm::initializeGlobalISel(PassRegistry &Registry) { +} + +#else + +void llvm::initializeGlobalISel(PassRegistry &Registry) { + initializeIRTranslatorPass(Registry); + initializeRegBankSelectPass(Registry); +} +#endif // LLVM_BUILD_GLOBAL_ISEL diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp new file mode 100644 index 0000000..b8a960c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -0,0 +1,164 @@ +//===-- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the IRTranslator class. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/IRTranslator.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Target/TargetLowering.h" + +#define DEBUG_TYPE "irtranslator" + +using namespace llvm; + +char IRTranslator::ID = 0; +INITIALIZE_PASS(IRTranslator, "irtranslator", "IRTranslator LLVM IR -> MI", + false, false); + +IRTranslator::IRTranslator() : MachineFunctionPass(ID), MRI(nullptr) { + initializeIRTranslatorPass(*PassRegistry::getPassRegistry()); +} + +unsigned IRTranslator::getOrCreateVReg(const Value &Val) { + unsigned &ValReg = ValToVReg[&Val]; + // Check if this is the first time we see Val. + if (!ValReg) { + // Fill ValRegsSequence with the sequence of registers + // we need to concat together to produce the value. + assert(Val.getType()->isSized() && + "Don't know how to create an empty vreg"); + assert(!Val.getType()->isAggregateType() && "Not yet implemented"); + unsigned Size = Val.getType()->getPrimitiveSizeInBits(); + unsigned VReg = MRI->createGenericVirtualRegister(Size); + ValReg = VReg; + assert(!isa<Constant>(Val) && "Not yet implemented"); + } + return ValReg; +} + +MachineBasicBlock &IRTranslator::getOrCreateBB(const BasicBlock &BB) { + MachineBasicBlock *&MBB = BBToMBB[&BB]; + if (!MBB) { + MachineFunction &MF = MIRBuilder.getMF(); + MBB = MF.CreateMachineBasicBlock(); + MF.push_back(MBB); + } + return *MBB; +} + +bool IRTranslator::translateBinaryOp(unsigned Opcode, const Instruction &Inst) { + // Get or create a virtual register for each value. + // Unless the value is a Constant => loadimm cst? + // or inline constant each time? + // Creation of a virtual register needs to have a size. + unsigned Op0 = getOrCreateVReg(*Inst.getOperand(0)); + unsigned Op1 = getOrCreateVReg(*Inst.getOperand(1)); + unsigned Res = getOrCreateVReg(Inst); + MIRBuilder.buildInstr(Opcode, Inst.getType(), Res, Op0, Op1); + return true; +} + +bool IRTranslator::translateReturn(const Instruction &Inst) { + assert(isa<ReturnInst>(Inst) && "Return expected"); + const Value *Ret = cast<ReturnInst>(Inst).getReturnValue(); + // The target may mess up with the insertion point, but + // this is not important as a return is the last instruction + // of the block anyway. + return CLI->lowerReturn(MIRBuilder, Ret, !Ret ? 0 : getOrCreateVReg(*Ret)); +} + +bool IRTranslator::translateBr(const Instruction &Inst) { + assert(isa<BranchInst>(Inst) && "Branch expected"); + const BranchInst &BrInst = *cast<BranchInst>(&Inst); + if (BrInst.isUnconditional()) { + const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getOperand(0)); + MachineBasicBlock &TgtBB = getOrCreateBB(BrTgt); + MIRBuilder.buildInstr(TargetOpcode::G_BR, BrTgt.getType(), TgtBB); + } else { + assert(0 && "Not yet implemented"); + } + // Link successors. + MachineBasicBlock &CurBB = MIRBuilder.getMBB(); + for (const BasicBlock *Succ : BrInst.successors()) + CurBB.addSuccessor(&getOrCreateBB(*Succ)); + return true; +} + +bool IRTranslator::translate(const Instruction &Inst) { + MIRBuilder.setDebugLoc(Inst.getDebugLoc()); + switch(Inst.getOpcode()) { + case Instruction::Add: + return translateBinaryOp(TargetOpcode::G_ADD, Inst); + case Instruction::Or: + return translateBinaryOp(TargetOpcode::G_OR, Inst); + case Instruction::Br: + return translateBr(Inst); + case Instruction::Ret: + return translateReturn(Inst); + + default: + llvm_unreachable("Opcode not supported"); + } +} + + +void IRTranslator::finalize() { + // Release the memory used by the different maps we + // needed during the translation. + ValToVReg.clear(); + Constants.clear(); +} + +bool IRTranslator::runOnMachineFunction(MachineFunction &MF) { + const Function &F = *MF.getFunction(); + if (F.empty()) + return false; + CLI = MF.getSubtarget().getCallLowering(); + MIRBuilder.setMF(MF); + MRI = &MF.getRegInfo(); + // Setup the arguments. + MachineBasicBlock &MBB = getOrCreateBB(F.front()); + MIRBuilder.setMBB(MBB); + SmallVector<unsigned, 8> VRegArgs; + for (const Argument &Arg: F.args()) + VRegArgs.push_back(getOrCreateVReg(Arg)); + bool Succeeded = + CLI->lowerFormalArguments(MIRBuilder, F.getArgumentList(), VRegArgs); + if (!Succeeded) + report_fatal_error("Unable to lower arguments"); + + for (const BasicBlock &BB: F) { + MachineBasicBlock &MBB = getOrCreateBB(BB); + // Set the insertion point of all the following translations to + // the end of this basic block. + MIRBuilder.setMBB(MBB); + for (const Instruction &Inst: BB) { + bool Succeeded = translate(Inst); + if (!Succeeded) { + DEBUG(dbgs() << "Cannot translate: " << Inst << '\n'); + report_fatal_error("Unable to translate instruction"); + } + } + } + + // Now that the MachineFrameInfo has been configured, no further changes to + // the reserved registers are possible. + MRI->freezeReservedRegs(MF); + + return false; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp new file mode 100644 index 0000000..2f19bcf --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -0,0 +1,104 @@ +//===-- llvm/CodeGen/GlobalISel/MachineIRBuilder.cpp - MIBuilder--*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the MachineIRBuidler class. +//===----------------------------------------------------------------------===// +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOpcodes.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +void MachineIRBuilder::setMF(MachineFunction &MF) { + this->MF = &MF; + this->MBB = nullptr; + this->TII = MF.getSubtarget().getInstrInfo(); + this->DL = DebugLoc(); + this->MI = nullptr; +} + +void MachineIRBuilder::setMBB(MachineBasicBlock &MBB, bool Beginning) { + this->MBB = &MBB; + Before = Beginning; + assert(&getMF() == MBB.getParent() && + "Basic block is in a different function"); +} + +void MachineIRBuilder::setInstr(MachineInstr &MI, bool Before) { + assert(MI.getParent() && "Instruction is not part of a basic block"); + setMBB(*MI.getParent()); + this->MI = &MI; + this->Before = Before; +} + +MachineBasicBlock::iterator MachineIRBuilder::getInsertPt() { + if (MI) { + if (Before) + return MI; + if (!MI->getNextNode()) + return getMBB().end(); + return MI->getNextNode(); + } + return Before ? getMBB().begin() : getMBB().end(); +} + +//------------------------------------------------------------------------------ +// Build instruction variants. +//------------------------------------------------------------------------------ +MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, Type *Ty) { + MachineInstr *NewMI = BuildMI(getMF(), DL, getTII().get(Opcode)); + if (Ty) { + assert(isPreISelGenericOpcode(Opcode) && + "Only generic instruction can have a type"); + NewMI->setType(Ty); + } else + assert(!isPreISelGenericOpcode(Opcode) && + "Generic instruction must have a type"); + getMBB().insert(getInsertPt(), NewMI); + return NewMI; +} + +MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, unsigned Res, + unsigned Op0, unsigned Op1) { + return buildInstr(Opcode, nullptr, Res, Op0, Op1); +} + +MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, Type *Ty, + unsigned Res, unsigned Op0, + unsigned Op1) { + MachineInstr *NewMI = buildInstr(Opcode, Ty); + MachineInstrBuilder(getMF(), NewMI) + .addReg(Res, RegState::Define) + .addReg(Op0) + .addReg(Op1); + return NewMI; +} + +MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, unsigned Res, + unsigned Op0) { + MachineInstr *NewMI = buildInstr(Opcode, nullptr); + MachineInstrBuilder(getMF(), NewMI).addReg(Res, RegState::Define).addReg(Op0); + return NewMI; +} + +MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode) { + return buildInstr(Opcode, nullptr); +} + +MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, Type *Ty, + MachineBasicBlock &BB) { + MachineInstr *NewMI = buildInstr(Opcode, Ty); + MachineInstrBuilder(getMF(), NewMI).addMBB(&BB); + return NewMI; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp new file mode 100644 index 0000000..419e270 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -0,0 +1,897 @@ +//===- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the RegBankSelect class. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +#define DEBUG_TYPE "regbankselect" + +using namespace llvm; + +static cl::opt<RegBankSelect::Mode> RegBankSelectMode( + cl::desc("Mode of the RegBankSelect pass"), cl::Hidden, cl::Optional, + cl::values(clEnumValN(RegBankSelect::Mode::Fast, "regbankselect-fast", + "Run the Fast mode (default mapping)"), + clEnumValN(RegBankSelect::Mode::Greedy, "regbankselect-greedy", + "Use the Greedy mode (best local mapping)"), + clEnumValEnd)); + +char RegBankSelect::ID = 0; +INITIALIZE_PASS_BEGIN(RegBankSelect, "regbankselect", + "Assign register bank of generic virtual registers", + false, false); +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_END(RegBankSelect, "regbankselect", + "Assign register bank of generic virtual registers", false, + false); + +RegBankSelect::RegBankSelect(Mode RunningMode) + : MachineFunctionPass(ID), RBI(nullptr), MRI(nullptr), TRI(nullptr), + MBFI(nullptr), MBPI(nullptr), OptMode(RunningMode) { + initializeRegBankSelectPass(*PassRegistry::getPassRegistry()); + if (RegBankSelectMode.getNumOccurrences() != 0) { + OptMode = RegBankSelectMode; + if (RegBankSelectMode != RunningMode) + DEBUG(dbgs() << "RegBankSelect mode overrided by command line\n"); + } +} + +void RegBankSelect::init(MachineFunction &MF) { + RBI = MF.getSubtarget().getRegBankInfo(); + assert(RBI && "Cannot work without RegisterBankInfo"); + MRI = &MF.getRegInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + if (OptMode != Mode::Fast) { + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + } else { + MBFI = nullptr; + MBPI = nullptr; + } + MIRBuilder.setMF(MF); +} + +void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const { + if (OptMode != Mode::Fast) { + // We could preserve the information from these two analysis but + // the APIs do not allow to do so yet. + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineBranchProbabilityInfo>(); + } + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool RegBankSelect::assignmentMatch( + unsigned Reg, const RegisterBankInfo::ValueMapping &ValMapping, + bool &OnlyAssign) const { + // By default we assume we will have to repair something. + OnlyAssign = false; + // Each part of a break down needs to end up in a different register. + // In other word, Reg assignement does not match. + if (ValMapping.BreakDown.size() > 1) + return false; + + const RegisterBank *CurRegBank = RBI->getRegBank(Reg, *MRI, *TRI); + const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank; + // Reg is free of assignment, a simple assignment will make the + // register bank to match. + OnlyAssign = CurRegBank == nullptr; + DEBUG(dbgs() << "Does assignment already match: "; + if (CurRegBank) dbgs() << *CurRegBank; else dbgs() << "none"; + dbgs() << " against "; + assert(DesiredRegBrank && "The mapping must be valid"); + dbgs() << *DesiredRegBrank << '\n';); + return CurRegBank == DesiredRegBrank; +} + +void RegBankSelect::repairReg( + MachineOperand &MO, const RegisterBankInfo::ValueMapping &ValMapping, + RegBankSelect::RepairingPlacement &RepairPt, + const iterator_range<SmallVectorImpl<unsigned>::const_iterator> &NewVRegs) { + assert(ValMapping.BreakDown.size() == 1 && "Not yet implemented"); + // An empty range of new register means no repairing. + assert(NewVRegs.begin() != NewVRegs.end() && "We should not have to repair"); + + // Assume we are repairing a use and thus, the original reg will be + // the source of the repairing. + unsigned Src = MO.getReg(); + unsigned Dst = *NewVRegs.begin(); + + // If we repair a definition, swap the source and destination for + // the repairing. + if (MO.isDef()) + std::swap(Src, Dst); + + assert((RepairPt.getNumInsertPoints() == 1 || + TargetRegisterInfo::isPhysicalRegister(Dst)) && + "We are about to create several defs for Dst"); + + // Build the instruction used to repair, then clone it at the right places. + MachineInstr *MI = MIRBuilder.buildInstr(TargetOpcode::COPY, Dst, Src); + MI->removeFromParent(); + DEBUG(dbgs() << "Copy: " << PrintReg(Src) << " to: " << PrintReg(Dst) + << '\n'); + // TODO: + // Check if MI is legal. if not, we need to legalize all the + // instructions we are going to insert. + std::unique_ptr<MachineInstr *[]> NewInstrs( + new MachineInstr *[RepairPt.getNumInsertPoints()]); + bool IsFirst = true; + unsigned Idx = 0; + for (const std::unique_ptr<InsertPoint> &InsertPt : RepairPt) { + MachineInstr *CurMI; + if (IsFirst) + CurMI = MI; + else + CurMI = MIRBuilder.getMF().CloneMachineInstr(MI); + InsertPt->insert(*CurMI); + NewInstrs[Idx++] = CurMI; + IsFirst = false; + } + // TODO: + // Legalize NewInstrs if need be. +} + +uint64_t RegBankSelect::getRepairCost( + const MachineOperand &MO, + const RegisterBankInfo::ValueMapping &ValMapping) const { + assert(MO.isReg() && "We should only repair register operand"); + assert(!ValMapping.BreakDown.empty() && "Nothing to map??"); + + bool IsSameNumOfValues = ValMapping.BreakDown.size() == 1; + const RegisterBank *CurRegBank = RBI->getRegBank(MO.getReg(), *MRI, *TRI); + // If MO does not have a register bank, we should have just been + // able to set one unless we have to break the value down. + assert((!IsSameNumOfValues || CurRegBank) && "We should not have to repair"); + // Def: Val <- NewDefs + // Same number of values: copy + // Different number: Val = build_sequence Defs1, Defs2, ... + // Use: NewSources <- Val. + // Same number of values: copy. + // Different number: Src1, Src2, ... = + // extract_value Val, Src1Begin, Src1Len, Src2Begin, Src2Len, ... + // We should remember that this value is available somewhere else to + // coalesce the value. + + if (IsSameNumOfValues) { + const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank; + // If we repair a definition, swap the source and destination for + // the repairing. + if (MO.isDef()) + std::swap(CurRegBank, DesiredRegBrank); + // TODO: It may be possible to actually avoid the copy. + // If we repair something where the source is defined by a copy + // and the source of that copy is on the right bank, we can reuse + // it for free. + // E.g., + // RegToRepair<BankA> = copy AlternativeSrc<BankB> + // = op RegToRepair<BankA> + // We can simply propagate AlternativeSrc instead of copying RegToRepair + // into a new virtual register. + // We would also need to propagate this information in the + // repairing placement. + unsigned Cost = + RBI->copyCost(*DesiredRegBrank, *CurRegBank, + RegisterBankInfo::getSizeInBits(MO.getReg(), *MRI, *TRI)); + // TODO: use a dedicated constant for ImpossibleCost. + if (Cost != UINT_MAX) + return Cost; + assert(false && "Legalization not available yet"); + // Return the legalization cost of that repairing. + } + assert(false && "Complex repairing not implemented yet"); + return 1; +} + +RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping( + MachineInstr &MI, RegisterBankInfo::InstructionMappings &PossibleMappings, + SmallVectorImpl<RepairingPlacement> &RepairPts) { + + RegisterBankInfo::InstructionMapping *BestMapping = nullptr; + MappingCost Cost = MappingCost::ImpossibleCost(); + SmallVector<RepairingPlacement, 4> LocalRepairPts; + for (RegisterBankInfo::InstructionMapping &CurMapping : PossibleMappings) { + MappingCost CurCost = computeMapping(MI, CurMapping, LocalRepairPts, &Cost); + if (CurCost < Cost) { + Cost = CurCost; + BestMapping = &CurMapping; + RepairPts.clear(); + for (RepairingPlacement &RepairPt : LocalRepairPts) + RepairPts.emplace_back(std::move(RepairPt)); + } + } + assert(BestMapping && "No suitable mapping for instruction"); + return *BestMapping; +} + +void RegBankSelect::tryAvoidingSplit( + RegBankSelect::RepairingPlacement &RepairPt, const MachineOperand &MO, + const RegisterBankInfo::ValueMapping &ValMapping) const { + const MachineInstr &MI = *MO.getParent(); + assert(RepairPt.hasSplit() && "We should not have to adjust for split"); + // Splitting should only occur for PHIs or between terminators, + // because we only do local repairing. + assert((MI.isPHI() || MI.isTerminator()) && "Why do we split?"); + + assert(&MI.getOperand(RepairPt.getOpIdx()) == &MO && + "Repairing placement does not match operand"); + + // If we need splitting for phis, that means it is because we + // could not find an insertion point before the terminators of + // the predecessor block for this argument. In other words, + // the input value is defined by one of the terminators. + assert((!MI.isPHI() || !MO.isDef()) && "Need split for phi def?"); + + // We split to repair the use of a phi or a terminator. + if (!MO.isDef()) { + if (MI.isTerminator()) { + assert(&MI != &(*MI.getParent()->getFirstTerminator()) && + "Need to split for the first terminator?!"); + } else { + // For the PHI case, the split may not be actually required. + // In the copy case, a phi is already a copy on the incoming edge, + // therefore there is no need to split. + if (ValMapping.BreakDown.size() == 1) + // This is a already a copy, there is nothing to do. + RepairPt.switchTo(RepairingPlacement::RepairingKind::Reassign); + } + return; + } + + // At this point, we need to repair a defintion of a terminator. + + // Technically we need to fix the def of MI on all outgoing + // edges of MI to keep the repairing local. In other words, we + // will create several definitions of the same register. This + // does not work for SSA unless that definition is a physical + // register. + // However, there are other cases where we can get away with + // that while still keeping the repairing local. + assert(MI.isTerminator() && MO.isDef() && + "This code is for the def of a terminator"); + + // Since we use RPO traversal, if we need to repair a definition + // this means this definition could be: + // 1. Used by PHIs (i.e., this VReg has been visited as part of the + // uses of a phi.), or + // 2. Part of a target specific instruction (i.e., the target applied + // some register class constraints when creating the instruction.) + // If the constraints come for #2, the target said that another mapping + // is supported so we may just drop them. Indeed, if we do not change + // the number of registers holding that value, the uses will get fixed + // when we get to them. + // Uses in PHIs may have already been proceeded though. + // If the constraints come for #1, then, those are weak constraints and + // no actual uses may rely on them. However, the problem remains mainly + // the same as for #2. If the value stays in one register, we could + // just switch the register bank of the definition, but we would need to + // account for a repairing cost for each phi we silently change. + // + // In any case, if the value needs to be broken down into several + // registers, the repairing is not local anymore as we need to patch + // every uses to rebuild the value in just one register. + // + // To summarize: + // - If the value is in a physical register, we can do the split and + // fix locally. + // Otherwise if the value is in a virtual register: + // - If the value remains in one register, we do not have to split + // just switching the register bank would do, but we need to account + // in the repairing cost all the phi we changed. + // - If the value spans several registers, then we cannot do a local + // repairing. + + // Check if this is a physical or virtual register. + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // We are going to split every outgoing edges. + // Check that this is possible. + // FIXME: The machine representation is currently broken + // since it also several terminators in one basic block. + // Because of that we would technically need a way to get + // the targets of just one terminator to know which edges + // we have to split. + // Assert that we do not hit the ill-formed representation. + + // If there are other terminators before that one, some of + // the outgoing edges may not be dominated by this definition. + assert(&MI == &(*MI.getParent()->getFirstTerminator()) && + "Do not know which outgoing edges are relevant"); + const MachineInstr *Next = MI.getNextNode(); + assert((!Next || Next->isUnconditionalBranch()) && + "Do not know where each terminator ends up"); + if (Next) + // If the next terminator uses Reg, this means we have + // to split right after MI and thus we need a way to ask + // which outgoing edges are affected. + assert(!Next->readsRegister(Reg) && "Need to split between terminators"); + // We will split all the edges and repair there. + } else { + // This is a virtual register defined by a terminator. + if (ValMapping.BreakDown.size() == 1) { + // There is nothing to repair, but we may actually lie on + // the repairing cost because of the PHIs already proceeded + // as already stated. + // Though the code will be correct. + assert(0 && "Repairing cost may not be accurate"); + } else { + // We need to do non-local repairing. Basically, patch all + // the uses (i.e., phis) that we already proceeded. + // For now, just say this mapping is not possible. + RepairPt.switchTo(RepairingPlacement::RepairingKind::Impossible); + } + } +} + +RegBankSelect::MappingCost RegBankSelect::computeMapping( + MachineInstr &MI, const RegisterBankInfo::InstructionMapping &InstrMapping, + SmallVectorImpl<RepairingPlacement> &RepairPts, + const RegBankSelect::MappingCost *BestCost) { + assert((MBFI || !BestCost) && "Costs comparison require MBFI"); + + // If mapped with InstrMapping, MI will have the recorded cost. + MappingCost Cost(MBFI ? MBFI->getBlockFreq(MI.getParent()) : 1); + bool Saturated = Cost.addLocalCost(InstrMapping.getCost()); + assert(!Saturated && "Possible mapping saturated the cost"); + DEBUG(dbgs() << "Evaluating mapping cost for: " << MI); + DEBUG(dbgs() << "With: " << InstrMapping << '\n'); + RepairPts.clear(); + if (BestCost && Cost > *BestCost) + return Cost; + + // Moreover, to realize this mapping, the register bank of each operand must + // match this mapping. In other words, we may need to locally reassign the + // register banks. Account for that repairing cost as well. + // In this context, local means in the surrounding of MI. + for (unsigned OpIdx = 0, EndOpIdx = MI.getNumOperands(); OpIdx != EndOpIdx; + ++OpIdx) { + const MachineOperand &MO = MI.getOperand(OpIdx); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + DEBUG(dbgs() << "Opd" << OpIdx); + const RegisterBankInfo::ValueMapping &ValMapping = + InstrMapping.getOperandMapping(OpIdx); + // If Reg is already properly mapped, this is free. + bool Assign; + if (assignmentMatch(Reg, ValMapping, Assign)) { + DEBUG(dbgs() << " is free (match).\n"); + continue; + } + if (Assign) { + DEBUG(dbgs() << " is free (simple assignment).\n"); + RepairPts.emplace_back(RepairingPlacement(MI, OpIdx, *TRI, *this, + RepairingPlacement::Reassign)); + continue; + } + + // Find the insertion point for the repairing code. + RepairPts.emplace_back( + RepairingPlacement(MI, OpIdx, *TRI, *this, RepairingPlacement::Insert)); + RepairingPlacement &RepairPt = RepairPts.back(); + + // If we need to split a basic block to materialize this insertion point, + // we may give a higher cost to this mapping. + // Nevertheless, we may get away with the split, so try that first. + if (RepairPt.hasSplit()) + tryAvoidingSplit(RepairPt, MO, ValMapping); + + // Check that the materialization of the repairing is possible. + if (!RepairPt.canMaterialize()) + return MappingCost::ImpossibleCost(); + + // Account for the split cost and repair cost. + // Unless the cost is already saturated or we do not care about the cost. + if (!BestCost || Saturated) + continue; + + // To get accurate information we need MBFI and MBPI. + // Thus, if we end up here this information should be here. + assert(MBFI && MBPI && "Cost computation requires MBFI and MBPI"); + + // FIXME: We will have to rework the repairing cost model. + // The repairing cost depends on the register bank that MO has. + // However, when we break down the value into different values, + // MO may not have a register bank while still needing repairing. + // For the fast mode, we don't compute the cost so that is fine, + // but still for the repairing code, we will have to make a choice. + // For the greedy mode, we should choose greedily what is the best + // choice based on the next use of MO. + + // Sums up the repairing cost of MO at each insertion point. + uint64_t RepairCost = getRepairCost(MO, ValMapping); + // Bias used for splitting: 5%. + const uint64_t PercentageForBias = 5; + uint64_t Bias = (RepairCost * PercentageForBias + 99) / 100; + // We should not need more than a couple of instructions to repair + // an assignment. In other words, the computation should not + // overflow because the repairing cost is free of basic block + // frequency. + assert(((RepairCost < RepairCost * PercentageForBias) && + (RepairCost * PercentageForBias < + RepairCost * PercentageForBias + 99)) && + "Repairing involves more than a billion of instructions?!"); + for (const std::unique_ptr<InsertPoint> &InsertPt : RepairPt) { + assert(InsertPt->canMaterialize() && "We should not have made it here"); + // We will applied some basic block frequency and those uses uint64_t. + if (!InsertPt->isSplit()) + Saturated = Cost.addLocalCost(RepairCost); + else { + uint64_t CostForInsertPt = RepairCost; + // Again we shouldn't overflow here givent that + // CostForInsertPt is frequency free at this point. + assert(CostForInsertPt + Bias > CostForInsertPt && + "Repairing + split bias overflows"); + CostForInsertPt += Bias; + uint64_t PtCost = InsertPt->frequency(*this) * CostForInsertPt; + // Check if we just overflowed. + if ((Saturated = PtCost < CostForInsertPt)) + Cost.saturate(); + else + Saturated = Cost.addNonLocalCost(PtCost); + } + + // Stop looking into what it takes to repair, this is already + // too expensive. + if (BestCost && Cost > *BestCost) + return Cost; + + // No need to accumulate more cost information. + // We need to still gather the repairing information though. + if (Saturated) + break; + } + } + return Cost; +} + +void RegBankSelect::applyMapping( + MachineInstr &MI, const RegisterBankInfo::InstructionMapping &InstrMapping, + SmallVectorImpl<RegBankSelect::RepairingPlacement> &RepairPts) { + // OpdMapper will hold all the information needed for the rewritting. + RegisterBankInfo::OperandsMapper OpdMapper(MI, InstrMapping, *MRI); + + // First, place the repairing code. + for (RepairingPlacement &RepairPt : RepairPts) { + assert(RepairPt.canMaterialize() && + RepairPt.getKind() != RepairingPlacement::Impossible && + "This mapping is impossible"); + assert(RepairPt.getKind() != RepairingPlacement::None && + "This should not make its way in the list"); + unsigned OpIdx = RepairPt.getOpIdx(); + MachineOperand &MO = MI.getOperand(OpIdx); + const RegisterBankInfo::ValueMapping &ValMapping = + InstrMapping.getOperandMapping(OpIdx); + unsigned BreakDownSize = ValMapping.BreakDown.size(); + (void)BreakDownSize; + unsigned Reg = MO.getReg(); + + switch (RepairPt.getKind()) { + case RepairingPlacement::Reassign: + assert(BreakDownSize == 1 && + "Reassignment should only be for simple mapping"); + MRI->setRegBank(Reg, *ValMapping.BreakDown[0].RegBank); + break; + case RepairingPlacement::Insert: + OpdMapper.createVRegs(OpIdx); + repairReg(MO, ValMapping, RepairPt, OpdMapper.getVRegs(OpIdx)); + break; + default: + llvm_unreachable("Other kind should not happen"); + } + } + // Second, rewrite the instruction. + DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n'); + RBI->applyMapping(OpdMapper); +} + +void RegBankSelect::assignInstr(MachineInstr &MI) { + DEBUG(dbgs() << "Assign: " << MI); + // Remember the repairing placement for all the operands. + SmallVector<RepairingPlacement, 4> RepairPts; + + RegisterBankInfo::InstructionMapping BestMapping; + if (OptMode == RegBankSelect::Mode::Fast) { + BestMapping = RBI->getInstrMapping(MI); + MappingCost DefaultCost = computeMapping(MI, BestMapping, RepairPts); + (void)DefaultCost; + assert(DefaultCost != MappingCost::ImpossibleCost() && + "Default mapping is not suited"); + } else { + RegisterBankInfo::InstructionMappings PossibleMappings = + RBI->getInstrPossibleMappings(MI); + assert(!PossibleMappings.empty() && + "Do not know how to map this instruction"); + BestMapping = std::move(findBestMapping(MI, PossibleMappings, RepairPts)); + } + // Make sure the mapping is valid for MI. + assert(BestMapping.verify(MI) && "Invalid instruction mapping"); + + DEBUG(dbgs() << "Mapping: " << BestMapping << '\n'); + + // After this call, MI may not be valid anymore. + // Do not use it. + applyMapping(MI, BestMapping, RepairPts); +} + +bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n'); + const Function *F = MF.getFunction(); + Mode SaveOptMode = OptMode; + if (F->hasFnAttribute(Attribute::OptimizeNone)) + OptMode = Mode::Fast; + init(MF); + // Walk the function and assign register banks to all operands. + // Use a RPOT to make sure all registers are assigned before we choose + // the best mapping of the current instruction. + ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); + for (MachineBasicBlock *MBB : RPOT) { + // Set a sensible insertion point so that subsequent calls to + // MIRBuilder. + MIRBuilder.setMBB(*MBB); + for (MachineBasicBlock::iterator MII = MBB->begin(), End = MBB->end(); + MII != End;) { + // MI might be invalidated by the assignment, so move the + // iterator before hand. + assignInstr(*MII++); + } + } + OptMode = SaveOptMode; + return false; +} + +//------------------------------------------------------------------------------ +// Helper Classes Implementation +//------------------------------------------------------------------------------ +RegBankSelect::RepairingPlacement::RepairingPlacement( + MachineInstr &MI, unsigned OpIdx, const TargetRegisterInfo &TRI, Pass &P, + RepairingPlacement::RepairingKind Kind) + // Default is, we are going to insert code to repair OpIdx. + : Kind(Kind), + OpIdx(OpIdx), + CanMaterialize(Kind != RepairingKind::Impossible), + HasSplit(false), + P(P) { + const MachineOperand &MO = MI.getOperand(OpIdx); + assert(MO.isReg() && "Trying to repair a non-reg operand"); + + if (Kind != RepairingKind::Insert) + return; + + // Repairings for definitions happen after MI, uses happen before. + bool Before = !MO.isDef(); + + // Check if we are done with MI. + if (!MI.isPHI() && !MI.isTerminator()) { + addInsertPoint(MI, Before); + // We are done with the initialization. + return; + } + + // Now, look for the special cases. + if (MI.isPHI()) { + // - PHI must be the first instructions: + // * Before, we have to split the related incoming edge. + // * After, move the insertion point past the last phi. + if (!Before) { + MachineBasicBlock::iterator It = MI.getParent()->getFirstNonPHI(); + if (It != MI.getParent()->end()) + addInsertPoint(*It, /*Before*/ true); + else + addInsertPoint(*(--It), /*Before*/ false); + return; + } + // We repair a use of a phi, we may need to split the related edge. + MachineBasicBlock &Pred = *MI.getOperand(OpIdx + 1).getMBB(); + // Check if we can move the insertion point prior to the + // terminators of the predecessor. + unsigned Reg = MO.getReg(); + MachineBasicBlock::iterator It = Pred.getLastNonDebugInstr(); + for (auto Begin = Pred.begin(); It != Begin && It->isTerminator(); --It) + if (It->modifiesRegister(Reg, &TRI)) { + // We cannot hoist the repairing code in the predecessor. + // Split the edge. + addInsertPoint(Pred, *MI.getParent()); + return; + } + // At this point, we can insert in Pred. + + // - If It is invalid, Pred is empty and we can insert in Pred + // wherever we want. + // - If It is valid, It is the first non-terminator, insert after It. + if (It == Pred.end()) + addInsertPoint(Pred, /*Beginning*/ false); + else + addInsertPoint(*It, /*Before*/ false); + } else { + // - Terminators must be the last instructions: + // * Before, move the insert point before the first terminator. + // * After, we have to split the outcoming edges. + unsigned Reg = MO.getReg(); + if (Before) { + // Check whether Reg is defined by any terminator. + MachineBasicBlock::iterator It = MI; + for (auto Begin = MI.getParent()->begin(); + --It != Begin && It->isTerminator();) + if (It->modifiesRegister(Reg, &TRI)) { + // Insert the repairing code right after the definition. + addInsertPoint(*It, /*Before*/ false); + return; + } + addInsertPoint(*It, /*Before*/ true); + return; + } + // Make sure Reg is not redefined by other terminators, otherwise + // we do not know how to split. + for (MachineBasicBlock::iterator It = MI, End = MI.getParent()->end(); + ++It != End;) + // The machine verifier should reject this kind of code. + assert(It->modifiesRegister(Reg, &TRI) && "Do not know where to split"); + // Split each outcoming edges. + MachineBasicBlock &Src = *MI.getParent(); + for (auto &Succ : Src.successors()) + addInsertPoint(Src, Succ); + } +} + +void RegBankSelect::RepairingPlacement::addInsertPoint(MachineInstr &MI, + bool Before) { + addInsertPoint(*new InstrInsertPoint(MI, Before)); +} + +void RegBankSelect::RepairingPlacement::addInsertPoint(MachineBasicBlock &MBB, + bool Beginning) { + addInsertPoint(*new MBBInsertPoint(MBB, Beginning)); +} + +void RegBankSelect::RepairingPlacement::addInsertPoint(MachineBasicBlock &Src, + MachineBasicBlock &Dst) { + addInsertPoint(*new EdgeInsertPoint(Src, Dst, P)); +} + +void RegBankSelect::RepairingPlacement::addInsertPoint( + RegBankSelect::InsertPoint &Point) { + CanMaterialize &= Point.canMaterialize(); + HasSplit |= Point.isSplit(); + InsertPoints.emplace_back(&Point); +} + +RegBankSelect::InstrInsertPoint::InstrInsertPoint(MachineInstr &Instr, + bool Before) + : InsertPoint(), Instr(Instr), Before(Before) { + // Since we do not support splitting, we do not need to update + // liveness and such, so do not do anything with P. + assert((!Before || !Instr.isPHI()) && + "Splitting before phis requires more points"); + assert((!Before || !Instr.getNextNode() || !Instr.getNextNode()->isPHI()) && + "Splitting between phis does not make sense"); +} + +void RegBankSelect::InstrInsertPoint::materialize() { + if (isSplit()) { + // Slice and return the beginning of the new block. + // If we need to split between the terminators, we theoritically + // need to know where the first and second set of terminators end + // to update the successors properly. + // Now, in pratice, we should have a maximum of 2 branch + // instructions; one conditional and one unconditional. Therefore + // we know how to update the successor by looking at the target of + // the unconditional branch. + // If we end up splitting at some point, then, we should update + // the liveness information and such. I.e., we would need to + // access P here. + // The machine verifier should actually make sure such cases + // cannot happen. + llvm_unreachable("Not yet implemented"); + } + // Otherwise the insertion point is just the current or next + // instruction depending on Before. I.e., there is nothing to do + // here. +} + +bool RegBankSelect::InstrInsertPoint::isSplit() const { + // If the insertion point is after a terminator, we need to split. + if (!Before) + return Instr.isTerminator(); + // If we insert before an instruction that is after a terminator, + // we are still after a terminator. + return Instr.getPrevNode() && Instr.getPrevNode()->isTerminator(); +} + +uint64_t RegBankSelect::InstrInsertPoint::frequency(const Pass &P) const { + // Even if we need to split, because we insert between terminators, + // this split has actually the same frequency as the instruction. + const MachineBlockFrequencyInfo *MBFI = + P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>(); + if (!MBFI) + return 1; + return MBFI->getBlockFreq(Instr.getParent()).getFrequency(); +} + +uint64_t RegBankSelect::MBBInsertPoint::frequency(const Pass &P) const { + const MachineBlockFrequencyInfo *MBFI = + P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>(); + if (!MBFI) + return 1; + return MBFI->getBlockFreq(&MBB).getFrequency(); +} + +void RegBankSelect::EdgeInsertPoint::materialize() { + // If we end up repairing twice at the same place before materializing the + // insertion point, we may think we have to split an edge twice. + // We should have a factory for the insert point such that identical points + // are the same instance. + assert(Src.isSuccessor(DstOrSplit) && DstOrSplit->isPredecessor(&Src) && + "This point has already been split"); + MachineBasicBlock *NewBB = Src.SplitCriticalEdge(DstOrSplit, P); + assert(NewBB && "Invalid call to materialize"); + // We reuse the destination block to hold the information of the new block. + DstOrSplit = NewBB; +} + +uint64_t RegBankSelect::EdgeInsertPoint::frequency(const Pass &P) const { + const MachineBlockFrequencyInfo *MBFI = + P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>(); + if (!MBFI) + return 1; + if (WasMaterialized) + return MBFI->getBlockFreq(DstOrSplit).getFrequency(); + + const MachineBranchProbabilityInfo *MBPI = + P.getAnalysisIfAvailable<MachineBranchProbabilityInfo>(); + if (!MBPI) + return 1; + // The basic block will be on the edge. + return (MBFI->getBlockFreq(&Src) * MBPI->getEdgeProbability(&Src, DstOrSplit)) + .getFrequency(); +} + +bool RegBankSelect::EdgeInsertPoint::canMaterialize() const { + // If this is not a critical edge, we should not have used this insert + // point. Indeed, either the successor or the predecessor should + // have do. + assert(Src.succ_size() > 1 && DstOrSplit->pred_size() > 1 && + "Edge is not critical"); + return Src.canSplitCriticalEdge(DstOrSplit); +} + +RegBankSelect::MappingCost::MappingCost(const BlockFrequency &LocalFreq) + : LocalCost(0), NonLocalCost(0), LocalFreq(LocalFreq.getFrequency()) {} + +bool RegBankSelect::MappingCost::addLocalCost(uint64_t Cost) { + // Check if this overflows. + if (LocalCost + Cost < LocalCost) { + saturate(); + return true; + } + LocalCost += Cost; + return isSaturated(); +} + +bool RegBankSelect::MappingCost::addNonLocalCost(uint64_t Cost) { + // Check if this overflows. + if (NonLocalCost + Cost < NonLocalCost) { + saturate(); + return true; + } + NonLocalCost += Cost; + return isSaturated(); +} + +bool RegBankSelect::MappingCost::isSaturated() const { + return LocalCost == UINT64_MAX - 1 && NonLocalCost == UINT64_MAX && + LocalFreq == UINT64_MAX; +} + +void RegBankSelect::MappingCost::saturate() { + *this = ImpossibleCost(); + --LocalCost; +} + +RegBankSelect::MappingCost RegBankSelect::MappingCost::ImpossibleCost() { + return MappingCost(UINT64_MAX, UINT64_MAX, UINT64_MAX); +} + +bool RegBankSelect::MappingCost::operator<(const MappingCost &Cost) const { + // Sort out the easy cases. + if (*this == Cost) + return false; + // If one is impossible to realize the other is cheaper unless it is + // impossible as well. + if ((*this == ImpossibleCost()) || (Cost == ImpossibleCost())) + return (*this == ImpossibleCost()) < (Cost == ImpossibleCost()); + // If one is saturated the other is cheaper, unless it is saturated + // as well. + if (isSaturated() || Cost.isSaturated()) + return isSaturated() < Cost.isSaturated(); + // At this point we know both costs hold sensible values. + + // If both values have a different base frequency, there is no much + // we can do but to scale everything. + // However, if they have the same base frequency we can avoid making + // complicated computation. + uint64_t ThisLocalAdjust; + uint64_t OtherLocalAdjust; + if (LLVM_LIKELY(LocalFreq == Cost.LocalFreq)) { + + // At this point, we know the local costs are comparable. + // Do the case that do not involve potential overflow first. + if (NonLocalCost == Cost.NonLocalCost) + // Since the non-local costs do not discriminate on the result, + // just compare the local costs. + return LocalCost < Cost.LocalCost; + + // The base costs are comparable so we may only keep the relative + // value to increase our chances of avoiding overflows. + ThisLocalAdjust = 0; + OtherLocalAdjust = 0; + if (LocalCost < Cost.LocalCost) + OtherLocalAdjust = Cost.LocalCost - LocalCost; + else + ThisLocalAdjust = LocalCost - Cost.LocalCost; + + } else { + ThisLocalAdjust = LocalCost; + OtherLocalAdjust = Cost.LocalCost; + } + + // The non-local costs are comparable, just keep the relative value. + uint64_t ThisNonLocalAdjust = 0; + uint64_t OtherNonLocalAdjust = 0; + if (NonLocalCost < Cost.NonLocalCost) + OtherNonLocalAdjust = Cost.NonLocalCost - NonLocalCost; + else + ThisNonLocalAdjust = NonLocalCost - Cost.NonLocalCost; + // Scale everything to make them comparable. + uint64_t ThisScaledCost = ThisLocalAdjust * LocalFreq; + // Check for overflow on that operation. + bool ThisOverflows = ThisLocalAdjust && (ThisScaledCost < ThisLocalAdjust || + ThisScaledCost < LocalFreq); + uint64_t OtherScaledCost = OtherLocalAdjust * Cost.LocalFreq; + // Check for overflow on the last operation. + bool OtherOverflows = + OtherLocalAdjust && + (OtherScaledCost < OtherLocalAdjust || OtherScaledCost < Cost.LocalFreq); + // Add the non-local costs. + ThisOverflows |= ThisNonLocalAdjust && + ThisScaledCost + ThisNonLocalAdjust < ThisNonLocalAdjust; + ThisScaledCost += ThisNonLocalAdjust; + OtherOverflows |= OtherNonLocalAdjust && + OtherScaledCost + OtherNonLocalAdjust < OtherNonLocalAdjust; + OtherScaledCost += OtherNonLocalAdjust; + // If both overflows, we cannot compare without additional + // precision, e.g., APInt. Just give up on that case. + if (ThisOverflows && OtherOverflows) + return false; + // If one overflows but not the other, we can still compare. + if (ThisOverflows || OtherOverflows) + return ThisOverflows < OtherOverflows; + // Otherwise, just compare the values. + return ThisScaledCost < OtherScaledCost; +} + +bool RegBankSelect::MappingCost::operator==(const MappingCost &Cost) const { + return LocalCost == Cost.LocalCost && NonLocalCost == Cost.NonLocalCost && + LocalFreq == Cost.LocalFreq; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp new file mode 100644 index 0000000..a911225 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp @@ -0,0 +1,107 @@ +//===- llvm/CodeGen/GlobalISel/RegisterBank.cpp - Register Bank --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the RegisterBank class. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#define DEBUG_TYPE "registerbank" + +using namespace llvm; + +const unsigned RegisterBank::InvalidID = UINT_MAX; + +RegisterBank::RegisterBank() : ID(InvalidID), Name(nullptr), Size(0) {} + +bool RegisterBank::verify(const TargetRegisterInfo &TRI) const { + assert(isValid() && "Invalid register bank"); + assert(ContainedRegClasses.size() == TRI.getNumRegClasses() && + "TRI does not match the initialization process?"); + for (unsigned RCId = 0, End = TRI.getNumRegClasses(); RCId != End; ++RCId) { + const TargetRegisterClass &RC = *TRI.getRegClass(RCId); + + if (!covers(RC)) + continue; + // Verify that the register bank covers all the sub classes of the + // classes it covers. + + // Use a different (slow in that case) method than + // RegisterBankInfo to find the subclasses of RC, to make sure + // both agree on the covers. + for (unsigned SubRCId = 0; SubRCId != End; ++SubRCId) { + const TargetRegisterClass &SubRC = *TRI.getRegClass(RCId); + + if (!RC.hasSubClassEq(&SubRC)) + continue; + + // Verify that the Size of the register bank is big enough to cover + // all the register classes it covers. + assert((getSize() >= SubRC.getSize() * 8) && + "Size is not big enough for all the subclasses!"); + assert(covers(SubRC) && "Not all subclasses are covered"); + } + } + return true; +} + +bool RegisterBank::covers(const TargetRegisterClass &RC) const { + assert(isValid() && "RB hasn't been initialized yet"); + return ContainedRegClasses.test(RC.getID()); +} + +bool RegisterBank::isValid() const { + return ID != InvalidID && Name != nullptr && Size != 0 && + // A register bank that does not cover anything is useless. + !ContainedRegClasses.empty(); +} + +bool RegisterBank::operator==(const RegisterBank &OtherRB) const { + // There must be only one instance of a given register bank alive + // for the whole compilation. + // The RegisterBankInfo is supposed to enforce that. + assert((OtherRB.getID() != getID() || &OtherRB == this) && + "ID does not uniquely identify a RegisterBank"); + return &OtherRB == this; +} + +void RegisterBank::dump(const TargetRegisterInfo *TRI) const { + print(dbgs(), /* IsForDebug */ true, TRI); +} + +void RegisterBank::print(raw_ostream &OS, bool IsForDebug, + const TargetRegisterInfo *TRI) const { + OS << getName(); + if (!IsForDebug) + return; + OS << "(ID:" << getID() << ", Size:" << getSize() << ")\n" + << "isValid:" << isValid() << '\n' + << "Number of Covered register classes: " << ContainedRegClasses.count() + << '\n'; + // Print all the subclasses if we can. + // This register classes may not be properly initialized yet. + if (!TRI || ContainedRegClasses.empty()) + return; + assert(ContainedRegClasses.size() == TRI->getNumRegClasses() && + "TRI does not match the initialization process?"); + bool IsFirst = true; + OS << "Covered register classes:\n"; + for (unsigned RCId = 0, End = TRI->getNumRegClasses(); RCId != End; ++RCId) { + const TargetRegisterClass &RC = *TRI->getRegClass(RCId); + + if (!covers(RC)) + continue; + + if (!IsFirst) + OS << ", "; + OS << TRI->getRegClassName(&RC); + IsFirst = false; + } +} diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp new file mode 100644 index 0000000..ef8e4f6 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -0,0 +1,663 @@ +//===- llvm/CodeGen/GlobalISel/RegisterBankInfo.cpp --------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the RegisterBankInfo class. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOpcodes.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +#include <algorithm> // For std::max. + +#define DEBUG_TYPE "registerbankinfo" + +using namespace llvm; + +const unsigned RegisterBankInfo::DefaultMappingID = UINT_MAX; +const unsigned RegisterBankInfo::InvalidMappingID = UINT_MAX - 1; + +//------------------------------------------------------------------------------ +// RegisterBankInfo implementation. +//------------------------------------------------------------------------------ +RegisterBankInfo::RegisterBankInfo(unsigned NumRegBanks) + : NumRegBanks(NumRegBanks) { + RegBanks.reset(new RegisterBank[NumRegBanks]); +} + +bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const { + DEBUG(for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) { + const RegisterBank &RegBank = getRegBank(Idx); + assert(Idx == RegBank.getID() && + "ID does not match the index in the array"); + dbgs() << "Verify " << RegBank << '\n'; + assert(RegBank.verify(TRI) && "RegBank is invalid"); + }); + return true; +} + +void RegisterBankInfo::createRegisterBank(unsigned ID, const char *Name) { + DEBUG(dbgs() << "Create register bank: " << ID << " with name \"" << Name + << "\"\n"); + RegisterBank &RegBank = getRegBank(ID); + assert(RegBank.getID() == RegisterBank::InvalidID && + "A register bank should be created only once"); + RegBank.ID = ID; + RegBank.Name = Name; +} + +void RegisterBankInfo::addRegBankCoverage(unsigned ID, unsigned RCId, + const TargetRegisterInfo &TRI, + bool AddTypeMapping) { + RegisterBank &RB = getRegBank(ID); + unsigned NbOfRegClasses = TRI.getNumRegClasses(); + + DEBUG(dbgs() << "Add coverage for: " << RB << '\n'); + + // Check if RB is underconstruction. + if (!RB.isValid()) + RB.ContainedRegClasses.resize(NbOfRegClasses); + else if (RB.covers(*TRI.getRegClass(RCId))) + // If RB already covers this register class, there is nothing + // to do. + return; + + BitVector &Covered = RB.ContainedRegClasses; + SmallVector<unsigned, 8> WorkList; + + WorkList.push_back(RCId); + Covered.set(RCId); + + unsigned &MaxSize = RB.Size; + do { + unsigned RCId = WorkList.pop_back_val(); + + const TargetRegisterClass &CurRC = *TRI.getRegClass(RCId); + + DEBUG(dbgs() << "Examine: " << TRI.getRegClassName(&CurRC) + << "(Size*8: " << (CurRC.getSize() * 8) << ")\n"); + + // Remember the biggest size in bits. + MaxSize = std::max(MaxSize, CurRC.getSize() * 8); + + // If we have been asked to record the type supported by this + // register bank, do it now. + if (AddTypeMapping) + for (MVT::SimpleValueType SVT : + make_range(CurRC.vt_begin(), CurRC.vt_end())) + recordRegBankForType(getRegBank(ID), SVT); + + // Walk through all sub register classes and push them into the worklist. + bool First = true; + for (BitMaskClassIterator It(CurRC.getSubClassMask(), TRI); It.isValid(); + ++It) { + unsigned SubRCId = It.getID(); + if (!Covered.test(SubRCId)) { + if (First) + DEBUG(dbgs() << " Enqueue sub-class: "); + DEBUG(dbgs() << TRI.getRegClassName(TRI.getRegClass(SubRCId)) << ", "); + WorkList.push_back(SubRCId); + // Remember that we saw the sub class. + Covered.set(SubRCId); + First = false; + } + } + if (!First) + DEBUG(dbgs() << '\n'); + + // Push also all the register classes that can be accessed via a + // subreg index, i.e., its subreg-class (which is different than + // its subclass). + // + // Note: It would probably be faster to go the other way around + // and have this method add only super classes, since this + // information is available in a more efficient way. However, it + // feels less natural for the client of this APIs plus we will + // TableGen the whole bitset at some point, so compile time for + // the initialization is not very important. + First = true; + for (unsigned SubRCId = 0; SubRCId < NbOfRegClasses; ++SubRCId) { + if (Covered.test(SubRCId)) + continue; + bool Pushed = false; + const TargetRegisterClass *SubRC = TRI.getRegClass(SubRCId); + for (SuperRegClassIterator SuperRCIt(SubRC, &TRI); SuperRCIt.isValid(); + ++SuperRCIt) { + if (Pushed) + break; + for (BitMaskClassIterator It(SuperRCIt.getMask(), TRI); It.isValid(); + ++It) { + unsigned SuperRCId = It.getID(); + if (SuperRCId == RCId) { + if (First) + DEBUG(dbgs() << " Enqueue subreg-class: "); + DEBUG(dbgs() << TRI.getRegClassName(SubRC) << ", "); + WorkList.push_back(SubRCId); + // Remember that we saw the sub class. + Covered.set(SubRCId); + Pushed = true; + First = false; + break; + } + } + } + } + if (!First) + DEBUG(dbgs() << '\n'); + } while (!WorkList.empty()); +} + +const RegisterBank * +RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return &getRegBankFromRegClass(*TRI.getMinimalPhysRegClass(Reg)); + + assert(Reg && "NoRegister does not have a register bank"); + const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); + if (RegClassOrBank.is<const RegisterBank *>()) + return RegClassOrBank.get<const RegisterBank *>(); + const TargetRegisterClass *RC = + RegClassOrBank.get<const TargetRegisterClass *>(); + if (RC) + return &getRegBankFromRegClass(*RC); + return nullptr; +} + +const RegisterBank *RegisterBankInfo::getRegBankFromConstraints( + const MachineInstr &MI, unsigned OpIdx, const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI) const { + // The mapping of the registers may be available via the + // register class constraints. + const TargetRegisterClass *RC = MI.getRegClassConstraint(OpIdx, &TII, &TRI); + + if (!RC) + return nullptr; + + const RegisterBank &RegBank = getRegBankFromRegClass(*RC); + // Sanity check that the target properly implemented getRegBankFromRegClass. + assert(RegBank.covers(*RC) && + "The mapping of the register bank does not make sense"); + return &RegBank; +} + +RegisterBankInfo::InstructionMapping +RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { + RegisterBankInfo::InstructionMapping Mapping(DefaultMappingID, /*Cost*/ 1, + MI.getNumOperands()); + const MachineFunction &MF = *MI.getParent()->getParent(); + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + // We may need to query the instruction encoding to guess the mapping. + const TargetInstrInfo &TII = *STI.getInstrInfo(); + + // Before doing anything complicated check if the mapping is not + // directly available. + bool CompleteMapping = true; + // For copies we want to walk over the operands and try to find one + // that has a register bank. + bool isCopyLike = MI.isCopy() || MI.isPHI(); + // Remember the register bank for reuse for copy-like instructions. + const RegisterBank *RegBank = nullptr; + // Remember the size of the register for reuse for copy-like instructions. + unsigned RegSize = 0; + for (unsigned OpIdx = 0, End = MI.getNumOperands(); OpIdx != End; ++OpIdx) { + const MachineOperand &MO = MI.getOperand(OpIdx); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + // The register bank of Reg is just a side effect of the current + // excution and in particular, there is no reason to believe this + // is the best default mapping for the current instruction. Keep + // it as an alternative register bank if we cannot figure out + // something. + const RegisterBank *AltRegBank = getRegBank(Reg, MRI, TRI); + // For copy-like instruction, we want to reuse the register bank + // that is already set on Reg, if any, since those instructions do + // not have any constraints. + const RegisterBank *CurRegBank = isCopyLike ? AltRegBank : nullptr; + if (!CurRegBank) { + // If this is a target specific instruction, we can deduce + // the register bank from the encoding constraints. + CurRegBank = getRegBankFromConstraints(MI, OpIdx, TII, TRI); + if (!CurRegBank) { + // Check if we can deduce the register bank from the type of + // the instruction. + Type *MITy = MI.getType(); + if (MITy) + CurRegBank = getRegBankForType( + MVT::getVT(MITy, /*HandleUnknown*/ true).SimpleTy); + if (!CurRegBank) + // Use the current assigned register bank. + // That may not make much sense though. + CurRegBank = AltRegBank; + if (!CurRegBank) { + // All our attempts failed, give up. + CompleteMapping = false; + + if (!isCopyLike) + // MI does not carry enough information to guess the mapping. + return InstructionMapping(); + + // For copies, we want to keep interating to find a register + // bank for the other operands if we did not find one yet. + if (RegBank) + break; + continue; + } + } + } + RegBank = CurRegBank; + RegSize = getSizeInBits(Reg, MRI, TRI); + Mapping.setOperandMapping(OpIdx, RegSize, *CurRegBank); + } + + if (CompleteMapping) + return Mapping; + + assert(isCopyLike && "We should have bailed on non-copies at this point"); + // For copy like instruction, if none of the operand has a register + // bank avialable, there is nothing we can propagate. + if (!RegBank) + return InstructionMapping(); + + // This is a copy-like instruction. + // Propagate RegBank to all operands that do not have a + // mapping yet. + for (unsigned OpIdx = 0, End = MI.getNumOperands(); OpIdx != End; ++OpIdx) { + const MachineOperand &MO = MI.getOperand(OpIdx); + // Don't assign a mapping for non-reg operands. + if (!MO.isReg()) + continue; + + // If a mapping already exists, do not touch it. + if (!static_cast<const InstructionMapping *>(&Mapping) + ->getOperandMapping(OpIdx) + .BreakDown.empty()) + continue; + + Mapping.setOperandMapping(OpIdx, RegSize, *RegBank); + } + return Mapping; +} + +RegisterBankInfo::InstructionMapping +RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { + RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI); + if (Mapping.isValid()) + return Mapping; + llvm_unreachable("The target must implement this"); +} + +RegisterBankInfo::InstructionMappings +RegisterBankInfo::getInstrPossibleMappings(const MachineInstr &MI) const { + InstructionMappings PossibleMappings; + // Put the default mapping first. + PossibleMappings.push_back(getInstrMapping(MI)); + // Then the alternative mapping, if any. + InstructionMappings AltMappings = getInstrAlternativeMappings(MI); + for (InstructionMapping &AltMapping : AltMappings) + PossibleMappings.emplace_back(std::move(AltMapping)); +#ifndef NDEBUG + for (const InstructionMapping &Mapping : PossibleMappings) + assert(Mapping.verify(MI) && "Mapping is invalid"); +#endif + return PossibleMappings; +} + +RegisterBankInfo::InstructionMappings +RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const { + // No alternative for MI. + return InstructionMappings(); +} + +void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { + MachineInstr &MI = OpdMapper.getMI(); + DEBUG(dbgs() << "Applying default-like mapping\n"); + for (unsigned OpIdx = 0, EndIdx = MI.getNumOperands(); OpIdx != EndIdx; + ++OpIdx) { + DEBUG(dbgs() << "OpIdx " << OpIdx); + MachineOperand &MO = MI.getOperand(OpIdx); + if (!MO.isReg()) { + DEBUG(dbgs() << " is not a register, nothing to be done\n"); + continue; + } + assert( + OpdMapper.getInstrMapping().getOperandMapping(OpIdx).BreakDown.size() == + 1 && + "This mapping is too complex for this function"); + iterator_range<SmallVectorImpl<unsigned>::const_iterator> NewRegs = + OpdMapper.getVRegs(OpIdx); + if (NewRegs.begin() == NewRegs.end()) { + DEBUG(dbgs() << " has not been repaired, nothing to be done\n"); + continue; + } + DEBUG(dbgs() << " changed, replace " << MO.getReg()); + MO.setReg(*NewRegs.begin()); + DEBUG(dbgs() << " with " << MO.getReg()); + } +} + +unsigned RegisterBankInfo::getSizeInBits(unsigned Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) { + const TargetRegisterClass *RC = nullptr; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // The size is not directly available for physical registers. + // Instead, we need to access a register class that contains Reg and + // get the size of that register class. + RC = TRI.getMinimalPhysRegClass(Reg); + } else { + unsigned RegSize = MRI.getSize(Reg); + // If Reg is not a generic register, query the register class to + // get its size. + if (RegSize) + return RegSize; + // Since Reg is not a generic register, it must have a register class. + RC = MRI.getRegClass(Reg); + } + assert(RC && "Unable to deduce the register class"); + return RC->getSize() * 8; +} + +//------------------------------------------------------------------------------ +// Helper classes implementation. +//------------------------------------------------------------------------------ +void RegisterBankInfo::PartialMapping::dump() const { + print(dbgs()); + dbgs() << '\n'; +} + +bool RegisterBankInfo::PartialMapping::verify() const { + assert(RegBank && "Register bank not set"); + assert(Length && "Empty mapping"); + assert((StartIdx < getHighBitIdx()) && "Overflow, switch to APInt?"); + // Check if the minimum width fits into RegBank. + assert(RegBank->getSize() >= Length && "Register bank too small for Mask"); + return true; +} + +void RegisterBankInfo::PartialMapping::print(raw_ostream &OS) const { + OS << "[" << StartIdx << ", " << getHighBitIdx() << "], RegBank = "; + if (RegBank) + OS << *RegBank; + else + OS << "nullptr"; +} + +bool RegisterBankInfo::ValueMapping::verify(unsigned ExpectedBitWidth) const { + assert(!BreakDown.empty() && "Value mapped nowhere?!"); + unsigned OrigValueBitWidth = 0; + for (const RegisterBankInfo::PartialMapping &PartMap : BreakDown) { + // Check that each register bank is big enough to hold the partial value: + // this check is done by PartialMapping::verify + assert(PartMap.verify() && "Partial mapping is invalid"); + // The original value should completely be mapped. + // Thus the maximum accessed index + 1 is the size of the original value. + OrigValueBitWidth = + std::max(OrigValueBitWidth, PartMap.getHighBitIdx() + 1); + } + assert(OrigValueBitWidth == ExpectedBitWidth && "BitWidth does not match"); + APInt ValueMask(OrigValueBitWidth, 0); + for (const RegisterBankInfo::PartialMapping &PartMap : BreakDown) { + // Check that the union of the partial mappings covers the whole value, + // without overlaps. + // The high bit is exclusive in the APInt API, thus getHighBitIdx + 1. + APInt PartMapMask = APInt::getBitsSet(OrigValueBitWidth, PartMap.StartIdx, + PartMap.getHighBitIdx() + 1); + ValueMask ^= PartMapMask; + assert((ValueMask & PartMapMask) == PartMapMask && + "Some partial mappings overlap"); + } + assert(ValueMask.isAllOnesValue() && "Value is not fully mapped"); + return true; +} + +void RegisterBankInfo::ValueMapping::dump() const { + print(dbgs()); + dbgs() << '\n'; +} + +void RegisterBankInfo::ValueMapping::print(raw_ostream &OS) const { + OS << "#BreakDown: " << BreakDown.size() << " "; + bool IsFirst = true; + for (const PartialMapping &PartMap : BreakDown) { + if (!IsFirst) + OS << ", "; + OS << '[' << PartMap << ']'; + IsFirst = false; + } +} + +void RegisterBankInfo::InstructionMapping::setOperandMapping( + unsigned OpIdx, unsigned MaskSize, const RegisterBank &RegBank) { + // Build the value mapping. + assert(MaskSize <= RegBank.getSize() && "Register bank is too small"); + + // Create the mapping object. + getOperandMapping(OpIdx).BreakDown.push_back( + PartialMapping(0, MaskSize, RegBank)); +} + +bool RegisterBankInfo::InstructionMapping::verify( + const MachineInstr &MI) const { + // Check that all the register operands are properly mapped. + // Check the constructor invariant. + assert(NumOperands == MI.getNumOperands() && + "NumOperands must match, see constructor"); + assert(MI.getParent() && MI.getParent()->getParent() && + "MI must be connected to a MachineFunction"); + const MachineFunction &MF = *MI.getParent()->getParent(); + (void)MF; + + for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { + const MachineOperand &MO = MI.getOperand(Idx); + const RegisterBankInfo::ValueMapping &MOMapping = getOperandMapping(Idx); + (void)MOMapping; + if (!MO.isReg()) { + assert(MOMapping.BreakDown.empty() && + "We should not care about non-reg mapping"); + continue; + } + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + // Register size in bits. + // This size must match what the mapping expects. + assert(MOMapping.verify(getSizeInBits( + Reg, MF.getRegInfo(), *MF.getSubtarget().getRegisterInfo())) && + "Value mapping is invalid"); + } + return true; +} + +void RegisterBankInfo::InstructionMapping::dump() const { + print(dbgs()); + dbgs() << '\n'; +} + +void RegisterBankInfo::InstructionMapping::print(raw_ostream &OS) const { + OS << "ID: " << getID() << " Cost: " << getCost() << " Mapping: "; + + for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { + const ValueMapping &ValMapping = getOperandMapping(OpIdx); + if (OpIdx) + OS << ", "; + OS << "{ Idx: " << OpIdx << " Map: " << ValMapping << '}'; + } +} + +const int RegisterBankInfo::OperandsMapper::DontKnowIdx = -1; + +RegisterBankInfo::OperandsMapper::OperandsMapper( + MachineInstr &MI, const InstructionMapping &InstrMapping, + MachineRegisterInfo &MRI) + : MRI(MRI), MI(MI), InstrMapping(InstrMapping) { + unsigned NumOpds = MI.getNumOperands(); + OpToNewVRegIdx.reset(new int[NumOpds]); + std::fill(&OpToNewVRegIdx[0], &OpToNewVRegIdx[NumOpds], + OperandsMapper::DontKnowIdx); + assert(InstrMapping.verify(MI) && "Invalid mapping for MI"); +} + +iterator_range<SmallVectorImpl<unsigned>::iterator> +RegisterBankInfo::OperandsMapper::getVRegsMem(unsigned OpIdx) { + assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access"); + unsigned NumPartialVal = + getInstrMapping().getOperandMapping(OpIdx).BreakDown.size(); + int StartIdx = OpToNewVRegIdx[OpIdx]; + + if (StartIdx == OperandsMapper::DontKnowIdx) { + // This is the first time we try to access OpIdx. + // Create the cells that will hold all the partial values at the + // end of the list of NewVReg. + StartIdx = NewVRegs.size(); + OpToNewVRegIdx[OpIdx] = StartIdx; + for (unsigned i = 0; i < NumPartialVal; ++i) + NewVRegs.push_back(0); + } + SmallVectorImpl<unsigned>::iterator End = + getNewVRegsEnd(StartIdx, NumPartialVal); + + return make_range(&NewVRegs[StartIdx], End); +} + +SmallVectorImpl<unsigned>::const_iterator +RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx, + unsigned NumVal) const { + return const_cast<OperandsMapper *>(this)->getNewVRegsEnd(StartIdx, NumVal); +} +SmallVectorImpl<unsigned>::iterator +RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx, + unsigned NumVal) { + assert((NewVRegs.size() == StartIdx + NumVal || + NewVRegs.size() > StartIdx + NumVal) && + "NewVRegs too small to contain all the partial mapping"); + return NewVRegs.size() <= StartIdx + NumVal ? NewVRegs.end() + : &NewVRegs[StartIdx + NumVal]; +} + +void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) { + assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access"); + iterator_range<SmallVectorImpl<unsigned>::iterator> NewVRegsForOpIdx = + getVRegsMem(OpIdx); + const SmallVectorImpl<PartialMapping> &PartMapList = + getInstrMapping().getOperandMapping(OpIdx).BreakDown; + SmallVectorImpl<PartialMapping>::const_iterator PartMap = PartMapList.begin(); + for (unsigned &NewVReg : NewVRegsForOpIdx) { + assert(PartMap != PartMapList.end() && "Out-of-bound access"); + assert(NewVReg == 0 && "Register has already been created"); + NewVReg = MRI.createGenericVirtualRegister(PartMap->Length); + MRI.setRegBank(NewVReg, *PartMap->RegBank); + ++PartMap; + } +} + +void RegisterBankInfo::OperandsMapper::setVRegs(unsigned OpIdx, + unsigned PartialMapIdx, + unsigned NewVReg) { + assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access"); + assert(getInstrMapping().getOperandMapping(OpIdx).BreakDown.size() > + PartialMapIdx && + "Out-of-bound access for partial mapping"); + // Make sure the memory is initialized for that operand. + (void)getVRegsMem(OpIdx); + assert(NewVRegs[OpToNewVRegIdx[OpIdx] + PartialMapIdx] == 0 && + "This value is already set"); + NewVRegs[OpToNewVRegIdx[OpIdx] + PartialMapIdx] = NewVReg; +} + +iterator_range<SmallVectorImpl<unsigned>::const_iterator> +RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx, + bool ForDebug) const { + (void)ForDebug; + assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access"); + int StartIdx = OpToNewVRegIdx[OpIdx]; + + if (StartIdx == OperandsMapper::DontKnowIdx) + return make_range(NewVRegs.end(), NewVRegs.end()); + + unsigned PartMapSize = + getInstrMapping().getOperandMapping(OpIdx).BreakDown.size(); + SmallVectorImpl<unsigned>::const_iterator End = + getNewVRegsEnd(StartIdx, PartMapSize); + iterator_range<SmallVectorImpl<unsigned>::const_iterator> Res = + make_range(&NewVRegs[StartIdx], End); +#ifndef NDEBUG + for (unsigned VReg : Res) + assert((VReg || ForDebug) && "Some registers are uninitialized"); +#endif + return Res; +} + +void RegisterBankInfo::OperandsMapper::dump() const { + print(dbgs(), true); + dbgs() << '\n'; +} + +void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS, + bool ForDebug) const { + unsigned NumOpds = getMI().getNumOperands(); + if (ForDebug) { + OS << "Mapping for " << getMI() << "\nwith " << getInstrMapping() << '\n'; + // Print out the internal state of the index table. + OS << "Populated indices (CellNumber, IndexInNewVRegs): "; + bool IsFirst = true; + for (unsigned Idx = 0; Idx != NumOpds; ++Idx) { + if (OpToNewVRegIdx[Idx] != DontKnowIdx) { + if (!IsFirst) + OS << ", "; + OS << '(' << Idx << ", " << OpToNewVRegIdx[Idx] << ')'; + IsFirst = false; + } + } + OS << '\n'; + } else + OS << "Mapping ID: " << getInstrMapping().getID() << ' '; + + OS << "Operand Mapping: "; + // If we have a function, we can pretty print the name of the registers. + // Otherwise we will print the raw numbers. + const TargetRegisterInfo *TRI = + getMI().getParent() && getMI().getParent()->getParent() + ? getMI().getParent()->getParent()->getSubtarget().getRegisterInfo() + : nullptr; + bool IsFirst = true; + for (unsigned Idx = 0; Idx != NumOpds; ++Idx) { + if (OpToNewVRegIdx[Idx] == DontKnowIdx) + continue; + if (!IsFirst) + OS << ", "; + IsFirst = false; + OS << '(' << PrintReg(getMI().getOperand(Idx).getReg(), TRI) << ", ["; + bool IsFirstNewVReg = true; + for (unsigned VReg : getVRegs(Idx)) { + if (!IsFirstNewVReg) + OS << ", "; + IsFirstNewVReg = false; + OS << PrintReg(VReg, TRI); + } + OS << "])"; + } +} diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp index dd9a840..8c760b7 100644 --- a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp @@ -59,7 +59,6 @@ // We use heuristics to discover the best global grouping we can (cf cl::opts). // ===---------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -92,6 +91,11 @@ EnableGlobalMerge("enable-global-merge", cl::Hidden, cl::desc("Enable the global merge pass"), cl::init(true)); +static cl::opt<unsigned> +GlobalMergeMaxOffset("global-merge-max-offset", cl::Hidden, + cl::desc("Set maximum offset for global merge pass"), + cl::init(0)); + static cl::opt<bool> GlobalMergeGroupByUse( "global-merge-group-by-use", cl::Hidden, cl::desc("Improve global merge pass to look at uses"), cl::init(true)); @@ -131,6 +135,8 @@ namespace { /// Whether we should merge global variables that have external linkage. bool MergeExternalGlobals; + bool IsMachO; + bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Module &M, bool isConst, unsigned AddrSpace) const; /// \brief Merge everything in \p Globals for which the corresponding bit @@ -158,10 +164,14 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. - explicit GlobalMerge(const TargetMachine *TM = nullptr, - unsigned MaximalOffset = 0, - bool OnlyOptimizeForSize = false, - bool MergeExternalGlobals = false) + explicit GlobalMerge() + : FunctionPass(ID), TM(nullptr), MaxOffset(GlobalMergeMaxOffset), + OnlyOptimizeForSize(false), MergeExternalGlobals(false) { + initializeGlobalMergePass(*PassRegistry::getPassRegistry()); + } + + explicit GlobalMerge(const TargetMachine *TM, unsigned MaximalOffset, + bool OnlyOptimizeForSize, bool MergeExternalGlobals) : FunctionPass(ID), TM(TM), MaxOffset(MaximalOffset), OnlyOptimizeForSize(OnlyOptimizeForSize), MergeExternalGlobals(MergeExternalGlobals) { @@ -459,8 +469,7 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals, // we can also emit an alias for internal linkage as it's safe to do so. // It's not safe on Mach-O as the alias (and thus the portion of the // MergedGlobals variable) may be dead stripped at link time. - if (Linkage != GlobalValue::InternalLinkage || - !TM->getTargetTriple().isOSBinFormatMachO()) { + if (Linkage != GlobalValue::InternalLinkage || !IsMachO) { GlobalAlias::create(Tys[idx], AddrSpace, Linkage, Name, GEP, &M); } @@ -513,6 +522,8 @@ bool GlobalMerge::doInitialization(Module &M) { if (!EnableGlobalMerge) return false; + IsMachO = Triple(M.getTargetTriple()).isOSBinFormatMachO(); + auto &DL = M.getDataLayout(); DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals, BSSGlobals; @@ -550,7 +561,8 @@ bool GlobalMerge::doInitialization(Module &M) { continue; if (DL.getTypeAllocSize(Ty) < MaxOffset) { - if (TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSSLocal()) + if (TM && + TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSSLocal()) BSSGlobals[AddressSpace].push_back(&GV); else if (GV.isConstant()) ConstGlobals[AddressSpace].push_back(&GV); diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp index c38c9d2..d225162 100644 --- a/contrib/llvm/lib/CodeGen/IfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file implements the machine instruction level if-conversion pass. +// This file implements the machine instruction level if-conversion pass, which +// tries to convert conditional branches into predicated instructions. // //===----------------------------------------------------------------------===// @@ -33,6 +34,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> +#include <utility> using namespace llvm; @@ -85,7 +87,7 @@ namespace { /// BBInfo - One per MachineBasicBlock, this is used to cache the result /// if-conversion feasibility analysis. This includes results from - /// TargetInstrInfo::AnalyzeBranch() (i.e. TBB, FBB, and Cond), and its + /// TargetInstrInfo::analyzeBranch() (i.e. TBB, FBB, and Cond), and its /// classification, and common tail block of its successors (if it's a /// diamond shape), its size, whether it's predicable, and whether any /// instruction can clobber the 'would-be' predicate. @@ -94,7 +96,7 @@ namespace { /// IsBeingAnalyzed - True if BB is currently being analyzed. /// IsAnalyzed - True if BB has been analyzed (info is still valid). /// IsEnqueued - True if BB has been enqueued to be ifcvt'ed. - /// IsBrAnalyzable - True if AnalyzeBranch() returns false. + /// IsBrAnalyzable - True if analyzeBranch() returns false. /// HasFallThrough - True if BB may fallthrough to the following BB. /// IsUnpredicable - True if BB is known to be unpredicable. /// ClobbersPred - True if BB could modify predicates (e.g. has @@ -103,7 +105,7 @@ namespace { /// ExtraCost - Extra cost for multi-cycle instructions. /// ExtraCost2 - Some instructions are slower when predicated /// BB - Corresponding MachineBasicBlock. - /// TrueBB / FalseBB- See AnalyzeBranch(). + /// TrueBB / FalseBB- See analyzeBranch(). /// BrCond - Conditions for end of block conditional branches. /// Predicate - Predicate used in the BB. struct BBInfo { @@ -161,7 +163,6 @@ namespace { const TargetLoweringBase *TLI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - const MachineBlockFrequencyInfo *MBFI; const MachineBranchProbabilityInfo *MBPI; MachineRegisterInfo *MRI; @@ -176,7 +177,7 @@ namespace { public: static char ID; IfConverter(std::function<bool(const Function &)> Ftor = nullptr) - : MachineFunctionPass(ID), FnNum(-1), PredicateFtor(Ftor) { + : MachineFunctionPass(ID), FnNum(-1), PredicateFtor(std::move(Ftor)) { initializeIfConverterPass(*PassRegistry::getPassRegistry()); } @@ -188,6 +189,11 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } + private: bool ReverseBranchCondition(BBInfo &BBI); bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups, @@ -198,10 +204,12 @@ namespace { bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, unsigned &Dups1, unsigned &Dups2) const; void ScanInstructions(BBInfo &BBI); - void AnalyzeBlock(MachineBasicBlock *MBB, std::vector<IfcvtToken*> &Tokens); + void AnalyzeBlock(MachineBasicBlock *MBB, + std::vector<std::unique_ptr<IfcvtToken>> &Tokens); bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond, bool isTriangle = false, bool RevBranch = false); - void AnalyzeBlocks(MachineFunction &MF, std::vector<IfcvtToken*> &Tokens); + void AnalyzeBlocks(MachineFunction &MF, + std::vector<std::unique_ptr<IfcvtToken>> &Tokens); void InvalidatePreds(MachineBasicBlock *BB); void RemoveExtraEdges(BBInfo &BBI); bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind); @@ -240,7 +248,8 @@ namespace { } // IfcvtTokenCmp - Used to sort if-conversion candidates. - static bool IfcvtTokenCmp(IfcvtToken *C1, IfcvtToken *C2) { + static bool IfcvtTokenCmp(const std::unique_ptr<IfcvtToken> &C1, + const std::unique_ptr<IfcvtToken> &C2) { int Incr1 = (C1->Kind == ICDiamond) ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups; int Incr2 = (C2->Kind == ICDiamond) @@ -273,14 +282,15 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false) bool IfConverter::runOnMachineFunction(MachineFunction &MF) { - if (PredicateFtor && !PredicateFtor(*MF.getFunction())) + if (skipFunction(*MF.getFunction()) || + (PredicateFtor && !PredicateFtor(*MF.getFunction()))) return false; const TargetSubtargetInfo &ST = MF.getSubtarget(); TLI = ST.getTargetLowering(); TII = ST.getInstrInfo(); TRI = ST.getRegisterInfo(); - MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + BranchFolder::MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>()); MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); MRI = &MF.getRegInfo(); SchedModel.init(ST.getSchedModel(), &ST, TII); @@ -292,7 +302,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { bool BFChange = false; if (!PreRegAlloc) { // Tail merge tend to expose more if-conversion opportunities. - BranchFolder BF(true, false, *MBFI, *MBPI); + BranchFolder BF(true, false, MBFI, *MBPI); BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo(), getAnalysisIfAvailable<MachineModuleInfo>()); } @@ -309,7 +319,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { MF.RenumberBlocks(); BBAnalysis.resize(MF.getNumBlockIDs()); - std::vector<IfcvtToken*> Tokens; + std::vector<std::unique_ptr<IfcvtToken>> Tokens; MadeChange = false; unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds; @@ -319,15 +329,13 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { bool Change = false; AnalyzeBlocks(MF, Tokens); while (!Tokens.empty()) { - IfcvtToken *Token = Tokens.back(); + std::unique_ptr<IfcvtToken> Token = std::move(Tokens.back()); Tokens.pop_back(); BBInfo &BBI = Token->BBI; IfcvtKind Kind = Token->Kind; unsigned NumDups = Token->NumDups; unsigned NumDups2 = Token->NumDups2; - delete Token; - // If the block has been evicted out of the queue or it has already been // marked dead (due to it being predicated), then skip it. if (BBI.IsDone) @@ -414,18 +422,11 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { MadeChange |= Change; } - // Delete tokens in case of early exit. - while (!Tokens.empty()) { - IfcvtToken *Token = Tokens.back(); - Tokens.pop_back(); - delete Token; - } - Tokens.clear(); BBAnalysis.clear(); if (MadeChange && IfCvtBranchFold) { - BranchFolder BF(false, false, *MBFI, *MBPI); + BranchFolder BF(false, false, MBFI, *MBPI); BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), getAnalysisIfAvailable<MachineModuleInfo>()); } @@ -586,7 +587,7 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, if (FIB == FIE) break; } - if (!TIB->isIdenticalTo(FIB)) + if (!TIB->isIdenticalTo(*FIB)) break; ++Dups1; ++TIB; @@ -595,15 +596,19 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, // Now, in preparation for counting duplicate instructions at the ends of the // blocks, move the end iterators up past any branch instructions. - while (TIE != TIB) { - --TIE; - if (!TIE->isBranch()) - break; - } - while (FIE != FIB) { - --FIE; - if (!FIE->isBranch()) - break; + // If both blocks are returning don't skip the branches, since they will + // likely be both identical return instructions. In such cases the return + // can be left unpredicated. + // Check for already containing all of the block. + if (TIB == TIE || FIB == FIE) + return true; + --TIE; + --FIE; + if (!TrueBBI.BB->succ_empty() || !FalseBBI.BB->succ_empty()) { + while (TIE != TIB && TIE->isBranch()) + --TIE; + while (FIE != FIB && FIE->isBranch()) + --FIE; } // If Dups1 includes all of a block, then don't count duplicate @@ -626,7 +631,7 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, if (FIE == FIB) break; } - if (!TIE->isIdenticalTo(FIE)) + if (!TIE->isIdenticalTo(*FIE)) break; ++Dups2; --TIE; @@ -650,7 +655,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { BBI.TrueBB = BBI.FalseBB = nullptr; BBI.BrCond.clear(); BBI.IsBrAnalyzable = - !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond); + !TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond); BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr; if (BBI.BrCond.size()) { @@ -670,16 +675,45 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { BBI.ExtraCost = 0; BBI.ExtraCost2 = 0; BBI.ClobbersPred = false; - for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end(); - I != E; ++I) { - if (I->isDebugValue()) + for (auto &MI : *BBI.BB) { + if (MI.isDebugValue()) continue; - if (I->isNotDuplicable()) + // It's unsafe to duplicate convergent instructions in this context, so set + // BBI.CannotBeCopied to true if MI is convergent. To see why, consider the + // following CFG, which is subject to our "simple" transformation. + // + // BB0 // if (c1) goto BB1; else goto BB2; + // / \ + // BB1 | + // | BB2 // if (c2) goto TBB; else goto FBB; + // | / | + // | / | + // TBB | + // | | + // | FBB + // | + // exit + // + // Suppose we want to move TBB's contents up into BB1 and BB2 (in BB1 they'd + // be unconditional, and in BB2, they'd be predicated upon c2), and suppose + // TBB contains a convergent instruction. This is safe iff doing so does + // not add a control-flow dependency to the convergent instruction -- i.e., + // it's safe iff the set of control flows that leads us to the convergent + // instruction does not get smaller after the transformation. + // + // Originally we executed TBB if c1 || c2. After the transformation, there + // are two copies of TBB's instructions. We get to the first if c1, and we + // get to the second if !c1 && c2. + // + // There are clearly fewer ways to satisfy the condition "c1" than + // "c1 || c2". Since we've shrunk the set of control flows which lead to + // our convergent instruction, the transformation is unsafe. + if (MI.isNotDuplicable() || MI.isConvergent()) BBI.CannotBeCopied = true; - bool isPredicated = TII->isPredicated(I); - bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch(); + bool isPredicated = TII->isPredicated(MI); + bool isCondBr = BBI.IsBrAnalyzable && MI.isConditionalBranch(); // A conditional branch is not predicable, but it may be eliminated. if (isCondBr) @@ -687,8 +721,8 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { if (!isPredicated) { BBI.NonPredSize++; - unsigned ExtraPredCost = TII->getPredicationCost(&*I); - unsigned NumCycles = SchedModel.computeInstrLatency(&*I, false); + unsigned ExtraPredCost = TII->getPredicationCost(MI); + unsigned NumCycles = SchedModel.computeInstrLatency(&MI, false); if (NumCycles > 1) BBI.ExtraCost += NumCycles-1; BBI.ExtraCost2 += ExtraPredCost; @@ -712,10 +746,10 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are // still potentially predicable. std::vector<MachineOperand> PredDefs; - if (TII->DefinesPredicate(I, PredDefs)) + if (TII->DefinesPredicate(MI, PredDefs)) BBI.ClobbersPred = true; - if (!TII->isPredicable(I)) { + if (!TII->isPredicable(MI)) { BBI.IsUnpredicable = true; return; } @@ -764,8 +798,8 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, /// AnalyzeBlock - Analyze the structure of the sub-CFG starting from /// the specified block. Record its successors and whether it looks like an /// if-conversion candidate. -void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB, - std::vector<IfcvtToken*> &Tokens) { +void IfConverter::AnalyzeBlock( + MachineBasicBlock *MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) { struct BBState { BBState(MachineBasicBlock *BB) : MBB(BB), SuccsAnalyzed(false) {} MachineBasicBlock *MBB; @@ -863,8 +897,8 @@ void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB, // \ / // TailBB // Note TailBB can be empty. - Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups, - Dups2)); + Tokens.push_back(llvm::make_unique<IfcvtToken>( + BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2)); Enqueued = true; } @@ -879,7 +913,8 @@ void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB, // | TBB // | / // FBB - Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups)); + Tokens.push_back( + llvm::make_unique<IfcvtToken>(BBI, ICTriangle, TNeedSub, Dups)); Enqueued = true; } @@ -887,7 +922,8 @@ void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB, MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, TrueBBI.ExtraCost2, Prediction) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) { - Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups)); + Tokens.push_back( + llvm::make_unique<IfcvtToken>(BBI, ICTriangleRev, TNeedSub, Dups)); Enqueued = true; } @@ -902,7 +938,8 @@ void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB, // | TBB---> exit // | // FBB - Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups)); + Tokens.push_back( + llvm::make_unique<IfcvtToken>(BBI, ICSimple, TNeedSub, Dups)); Enqueued = true; } @@ -914,7 +951,8 @@ void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB, FalseBBI.NonPredSize + FalseBBI.ExtraCost, FalseBBI.ExtraCost2, Prediction.getCompl()) && FeasibilityAnalysis(FalseBBI, RevCond, true)) { - Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups)); + Tokens.push_back(llvm::make_unique<IfcvtToken>(BBI, ICTriangleFalse, + FNeedSub, Dups)); Enqueued = true; } @@ -924,7 +962,8 @@ void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB, FalseBBI.NonPredSize + FalseBBI.ExtraCost, FalseBBI.ExtraCost2, Prediction.getCompl()) && FeasibilityAnalysis(FalseBBI, RevCond, true, true)) { - Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups)); + Tokens.push_back( + llvm::make_unique<IfcvtToken>(BBI, ICTriangleFRev, FNeedSub, Dups)); Enqueued = true; } @@ -933,7 +972,8 @@ void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB, FalseBBI.NonPredSize + FalseBBI.ExtraCost, FalseBBI.ExtraCost2, Prediction.getCompl()) && FeasibilityAnalysis(FalseBBI, RevCond)) { - Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups)); + Tokens.push_back( + llvm::make_unique<IfcvtToken>(BBI, ICSimpleFalse, FNeedSub, Dups)); Enqueued = true; } } @@ -947,8 +987,8 @@ void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB, /// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion /// candidates. -void IfConverter::AnalyzeBlocks(MachineFunction &MF, - std::vector<IfcvtToken*> &Tokens) { +void IfConverter::AnalyzeBlocks( + MachineFunction &MF, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) { for (auto &BB : MF) AnalyzeBlock(&BB, Tokens); @@ -1001,15 +1041,15 @@ static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB, void IfConverter::RemoveExtraEdges(BBInfo &BBI) { MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; - if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond)) + if (!TII->analyzeBranch(*BBI.BB, TBB, FBB, Cond)) BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); } /// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all /// values defined in MI which are not live/used by MI. -static void UpdatePredRedefs(MachineInstr *MI, LivePhysRegs &Redefs) { +static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) { SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Clobbers; - Redefs.stepForward(*MI, Clobbers); + Redefs.stepForward(MI, Clobbers); // Now add the implicit uses for each of the clobbered values. for (auto Reg : Clobbers) { @@ -1046,7 +1086,7 @@ static void UpdatePredRedefs(MachineInstr *MI, LivePhysRegs &Redefs) { * Remove kill flags from operands with a registers in the @p DontKill set. */ static void RemoveKills(MachineInstr &MI, const LivePhysRegs &DontKill) { - for (MIBundleOperands O(&MI); O.isValid(); ++O) { + for (MIBundleOperands O(MI); O.isValid(); ++O) { if (!O->isReg() || !O->isKill()) continue; if (DontKill.contains(O->getReg())) @@ -1097,13 +1137,13 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { // Initialize liveins to the first BB. These are potentiall redefined by // predicated instructions. Redefs.init(TRI); - Redefs.addLiveIns(CvtBBI->BB); - Redefs.addLiveIns(NextBBI->BB); + Redefs.addLiveIns(*CvtBBI->BB); + Redefs.addLiveIns(*NextBBI->BB); // Compute a set of registers which must not be killed by instructions in // BB1: This is everything live-in to BB2. DontKill.init(TRI); - DontKill.addLiveIns(NextBBI->BB); + DontKill.addLiveIns(*NextBBI->BB); if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); @@ -1202,8 +1242,8 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // Initialize liveins to the first BB. These are potentially redefined by // predicated instructions. Redefs.init(TRI); - Redefs.addLiveIns(CvtBBI->BB); - Redefs.addLiveIns(NextBBI->BB); + Redefs.addLiveIns(*CvtBBI->BB); + Redefs.addLiveIns(*NextBBI->BB); DontKill.clear(); @@ -1357,7 +1397,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // Initialize liveins to the first BB. These are potentially redefined by // predicated instructions. Redefs.init(TRI); - Redefs.addLiveIns(BBI1->BB); + Redefs.addLiveIns(*BBI1->BB); // Remove the duplicated instructions at the beginnings of both paths. // Skip dbg_value instructions @@ -1395,8 +1435,13 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); BBI2->BB->erase(BBI2->BB->begin(), DI2); - // Remove branch from 'true' block and remove duplicated instructions. - BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB); + // Remove branch from the 'true' block, unless it was not analyzable. + // Non-analyzable branches need to be preserved, since in such cases, + // the CFG structure is not an actual diamond (the join block may not + // be present). + if (BBI1->IsBrAnalyzable) + BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB); + // Remove duplicated instructions. DI1 = BBI1->BB->end(); for (unsigned i = 0; i != NumDups2; ) { // NumDups2 only counted non-dbg_value instructions, so this won't @@ -1413,8 +1458,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // must be removed. RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI); - // Remove 'false' block branch and find the last instruction to predicate. - BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); + // Remove 'false' block branch (unless it was not analyzable), and find + // the last instruction to predicate. + if (BBI2->IsBrAnalyzable) + BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); DI2 = BBI2->BB->end(); while (NumDups2 != 0) { // NumDups2 only counted non-dbg_value instructions, so this won't @@ -1473,6 +1520,18 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // Predicate the 'true' block. PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, &RedefsByFalse); + // After predicating BBI1, if there is a predicated terminator in BBI1 and + // a non-predicated in BBI2, then we don't want to predicate the one from + // BBI2. The reason is that if we merged these blocks, we would end up with + // two predicated terminators in the same block. + if (!BBI2->BB->empty() && (DI2 == BBI2->BB->end())) { + MachineBasicBlock::iterator BBI1T = BBI1->BB->getFirstTerminator(); + MachineBasicBlock::iterator BBI2T = BBI2->BB->getFirstTerminator(); + if (BBI1T != BBI1->BB->end() && TII->isPredicated(*BBI1T) && + BBI2T != BBI2->BB->end() && !TII->isPredicated(*BBI2T)) + --DI2; + } + // Predicate the 'false' block. PredicateBlock(*BBI2, DI2, *Cond2); @@ -1488,6 +1547,12 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()]; bool CanMergeTail = !TailBBI.HasFallThrough && !TailBBI.BB->hasAddressTaken(); + // The if-converted block can still have a predicated terminator + // (e.g. a predicated return). If that is the case, we cannot merge + // it with the tail block. + MachineBasicBlock::const_iterator TI = BBI.BB->getFirstTerminator(); + if (TI != BBI.BB->end() && TII->isPredicated(*TI)) + CanMergeTail = false; // There may still be a fall-through edge from BBI1 or BBI2 to TailBB; // check if there are any other predecessors besides those. unsigned NumPreds = TailBB->pred_size(); @@ -1523,14 +1588,14 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, return true; } -static bool MaySpeculate(const MachineInstr *MI, +static bool MaySpeculate(const MachineInstr &MI, SmallSet<unsigned, 4> &LaterRedefs) { bool SawStore = true; - if (!MI->isSafeToMove(nullptr, SawStore)) + if (!MI.isSafeToMove(nullptr, SawStore)) return false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); @@ -1551,8 +1616,8 @@ void IfConverter::PredicateBlock(BBInfo &BBI, SmallSet<unsigned, 4> *LaterRedefs) { bool AnyUnpred = false; bool MaySpec = LaterRedefs != nullptr; - for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) { - if (I->isDebugValue() || TII->isPredicated(I)) + for (MachineInstr &I : llvm::make_range(BBI.BB->begin(), E)) { + if (I.isDebugValue() || TII->isPredicated(I)) continue; // It may be possible not to predicate an instruction if it's the 'true' // side of a diamond and the 'false' side may re-define the instruction's @@ -1566,7 +1631,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI, MaySpec = false; if (!TII->PredicateInstruction(I, Cond)) { #ifndef NDEBUG - dbgs() << "Unable to predicate " << *I << "!\n"; + dbgs() << "Unable to predicate " << I << "!\n"; #endif llvm_unreachable(nullptr); } @@ -1593,25 +1658,24 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, bool IgnoreBr) { MachineFunction &MF = *ToBBI.BB->getParent(); - for (MachineBasicBlock::iterator I = FromBBI.BB->begin(), - E = FromBBI.BB->end(); I != E; ++I) { + for (auto &I : *FromBBI.BB) { // Do not copy the end of the block branches. - if (IgnoreBr && I->isBranch()) + if (IgnoreBr && I.isBranch()) break; - MachineInstr *MI = MF.CloneMachineInstr(I); + MachineInstr *MI = MF.CloneMachineInstr(&I); ToBBI.BB->insert(ToBBI.BB->end(), MI); ToBBI.NonPredSize++; - unsigned ExtraPredCost = TII->getPredicationCost(&*I); - unsigned NumCycles = SchedModel.computeInstrLatency(&*I, false); + unsigned ExtraPredCost = TII->getPredicationCost(I); + unsigned NumCycles = SchedModel.computeInstrLatency(&I, false); if (NumCycles > 1) ToBBI.ExtraCost += NumCycles-1; ToBBI.ExtraCost2 += ExtraPredCost; if (!TII->isPredicated(I) && !MI->isDebugValue()) { - if (!TII->PredicateInstruction(MI, Cond)) { + if (!TII->PredicateInstruction(*MI, Cond)) { #ifndef NDEBUG - dbgs() << "Unable to predicate " << *I << "!\n"; + dbgs() << "Unable to predicate " << I << "!\n"; #endif llvm_unreachable(nullptr); } @@ -1619,7 +1683,7 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, // If the predicated instruction now redefines a register as the result of // if-conversion, add an implicit kill. - UpdatePredRedefs(MI, Redefs); + UpdatePredRedefs(*MI, Redefs); // Some kill flags may not be correct anymore. if (!DontKill.empty()) @@ -1659,8 +1723,16 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { assert(!FromBBI.BB->hasAddressTaken() && "Removing a BB whose address is taken!"); - ToBBI.BB->splice(ToBBI.BB->end(), - FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end()); + // In case FromBBI.BB contains terminators (e.g. return instruction), + // first move the non-terminator instructions, then the terminators. + MachineBasicBlock::iterator FromTI = FromBBI.BB->getFirstTerminator(); + MachineBasicBlock::iterator ToTI = ToBBI.BB->getFirstTerminator(); + ToBBI.BB->splice(ToTI, FromBBI.BB, FromBBI.BB->begin(), FromTI); + + // If FromBB has non-predicated terminator we should copy it at the end. + if (FromTI != FromBBI.BB->end() && !TII->isPredicated(*FromTI)) + ToTI = ToBBI.BB->end(); + ToBBI.BB->splice(ToTI, FromBBI.BB, FromTI, FromBBI.BB->end()); // Force normalizing the successors' probabilities of ToBBI.BB to convert all // unknown probabilities into known ones. @@ -1768,5 +1840,5 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { FunctionPass * llvm::createIfConverter(std::function<bool(const Function &)> Ftor) { - return new IfConverter(Ftor); + return new IfConverter(std::move(Ftor)); } diff --git a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp index 39c1b9f..31d6bd0 100644 --- a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -46,10 +47,9 @@ using namespace llvm; -static cl::opt<unsigned> PageSize("imp-null-check-page-size", - cl::desc("The page size of the target in " - "bytes"), - cl::init(4096)); +static cl::opt<int> PageSize("imp-null-check-page-size", + cl::desc("The page size of the target in bytes"), + cl::init(4096)); #define DEBUG_TYPE "implicit-null-checks" @@ -60,7 +60,7 @@ namespace { class ImplicitNullChecks : public MachineFunctionPass { /// Represents one null check that can be made implicit. - struct NullCheck { + class NullCheck { // The memory operation the null check can be folded into. MachineInstr *MemOperation; @@ -76,27 +76,42 @@ class ImplicitNullChecks : public MachineFunctionPass { // The block branched to if the pointer is null. MachineBasicBlock *NullSucc; - NullCheck() - : MemOperation(), CheckOperation(), CheckBlock(), NotNullSucc(), - NullSucc() {} + // If this is non-null, then MemOperation has a dependency on on this + // instruction; and it needs to be hoisted to execute before MemOperation. + MachineInstr *OnlyDependency; + public: explicit NullCheck(MachineInstr *memOperation, MachineInstr *checkOperation, MachineBasicBlock *checkBlock, MachineBasicBlock *notNullSucc, - MachineBasicBlock *nullSucc) + MachineBasicBlock *nullSucc, + MachineInstr *onlyDependency) : MemOperation(memOperation), CheckOperation(checkOperation), - CheckBlock(checkBlock), NotNullSucc(notNullSucc), NullSucc(nullSucc) { - } + CheckBlock(checkBlock), NotNullSucc(notNullSucc), NullSucc(nullSucc), + OnlyDependency(onlyDependency) {} + + MachineInstr *getMemOperation() const { return MemOperation; } + + MachineInstr *getCheckOperation() const { return CheckOperation; } + + MachineBasicBlock *getCheckBlock() const { return CheckBlock; } + + MachineBasicBlock *getNotNullSucc() const { return NotNullSucc; } + + MachineBasicBlock *getNullSucc() const { return NullSucc; } + + MachineInstr *getOnlyDependency() const { return OnlyDependency; } }; const TargetInstrInfo *TII = nullptr; const TargetRegisterInfo *TRI = nullptr; + AliasAnalysis *AA = nullptr; MachineModuleInfo *MMI = nullptr; bool analyzeBlockForNullChecks(MachineBasicBlock &MBB, SmallVectorImpl<NullCheck> &NullCheckList); MachineInstr *insertFaultingLoad(MachineInstr *LoadMI, MachineBasicBlock *MBB, - MCSymbol *HandlerLabel); + MachineBasicBlock *HandlerMBB); void rewriteNullChecks(ArrayRef<NullCheck> NullCheckList); public: @@ -107,6 +122,15 @@ public: } bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AAResultsWrapperPass>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } }; /// \brief Detect re-ordering hazards and dependencies. @@ -115,14 +139,22 @@ public: /// machine instruction can be re-ordered from after the machine instructions /// seen so far to before them. class HazardDetector { - DenseSet<unsigned> RegDefs; + static MachineInstr *getUnknownMI() { + return DenseMapInfo<MachineInstr *>::getTombstoneKey(); + } + + // Maps physical registers to the instruction defining them. If there has + // been more than one def of an specific register, that register is mapped to + // getUnknownMI(). + DenseMap<unsigned, MachineInstr *> RegDefs; DenseSet<unsigned> RegUses; const TargetRegisterInfo &TRI; bool hasSeenClobber; + AliasAnalysis &AA; public: - explicit HazardDetector(const TargetRegisterInfo &TRI) : - TRI(TRI), hasSeenClobber(false) {} + explicit HazardDetector(const TargetRegisterInfo &TRI, AliasAnalysis &AA) + : TRI(TRI), hasSeenClobber(false), AA(AA) {} /// \brief Make a note of \p MI for later queries to isSafeToHoist. /// @@ -130,8 +162,10 @@ public: void rememberInstruction(MachineInstr *MI); /// \brief Return true if it is safe to hoist \p MI from after all the - /// instructions seen so far (via rememberInstruction) to before it. - bool isSafeToHoist(MachineInstr *MI); + /// instructions seen so far (via rememberInstruction) to before it. If \p MI + /// has one and only one transitive dependency, set \p Dependency to that + /// instruction. If there are more dependencies, return false. + bool isSafeToHoist(MachineInstr *MI, MachineInstr *&Dependency); /// \brief Return true if this instance of HazardDetector has been clobbered /// (i.e. has no more useful information). @@ -170,15 +204,23 @@ void HazardDetector::rememberInstruction(MachineInstr *MI) { if (!MO.isReg() || !MO.getReg()) continue; - if (MO.isDef()) - RegDefs.insert(MO.getReg()); - else + if (MO.isDef()) { + auto It = RegDefs.find(MO.getReg()); + if (It == RegDefs.end()) + RegDefs.insert({MO.getReg(), MI}); + else { + assert(It->second && "Found null MI?"); + It->second = getUnknownMI(); + } + } else RegUses.insert(MO.getReg()); } } -bool HazardDetector::isSafeToHoist(MachineInstr *MI) { +bool HazardDetector::isSafeToHoist(MachineInstr *MI, + MachineInstr *&Dependency) { assert(!isClobbered() && "isSafeToHoist cannot do anything useful!"); + Dependency = nullptr; // Right now we don't want to worry about LLVM's memory model. This can be // made more precise later. @@ -188,9 +230,54 @@ bool HazardDetector::isSafeToHoist(MachineInstr *MI) { for (auto &MO : MI->operands()) { if (MO.isReg() && MO.getReg()) { - for (unsigned Reg : RegDefs) - if (TRI.regsOverlap(Reg, MO.getReg())) - return false; // We found a write-after-write or read-after-write + for (auto &RegDef : RegDefs) { + unsigned Reg = RegDef.first; + MachineInstr *MI = RegDef.second; + if (!TRI.regsOverlap(Reg, MO.getReg())) + continue; + + // We found a write-after-write or read-after-write, see if the + // instruction causing this dependency can be hoisted too. + + if (MI == getUnknownMI()) + // We don't have precise dependency information. + return false; + + if (Dependency) { + if (Dependency == MI) + continue; + // We already have one dependency, and we can track only one. + return false; + } + + // Now check if MI is actually a dependency that can be hoisted. + + // We don't want to track transitive dependencies. We already know that + // MI is the only instruction that defines Reg, but we need to be sure + // that it does not use any registers that have been defined (trivially + // checked below by ensuring that there are no register uses), and that + // it is the only def for every register it defines (otherwise we could + // violate a write after write hazard). + auto IsMIOperandSafe = [&](MachineOperand &MO) { + if (!MO.isReg() || !MO.getReg()) + return true; + if (MO.isUse()) + return false; + assert((!MO.isDef() || RegDefs.count(MO.getReg())) && + "All defs must be tracked in RegDefs by now!"); + return !MO.isDef() || RegDefs.find(MO.getReg())->second == MI; + }; + + if (!all_of(MI->operands(), IsMIOperandSafe)) + return false; + + // Now check for speculation safety: + bool SawStore = true; + if (!MI->isSafeToMove(&AA, SawStore) || MI->mayLoad()) + return false; + + Dependency = MI; + } if (MO.isDef()) for (unsigned Reg : RegUses) @@ -206,6 +293,7 @@ bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getRegInfo().getTargetRegisterInfo(); MMI = &MF.getMMI(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); SmallVector<NullCheck, 16> NullCheckList; @@ -218,6 +306,16 @@ bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) { return !NullCheckList.empty(); } +// Return true if any register aliasing \p Reg is live-in into \p MBB. +static bool AnyAliasLiveIn(const TargetRegisterInfo *TRI, + MachineBasicBlock *MBB, unsigned Reg) { + for (MCRegAliasIterator AR(Reg, TRI, /*IncludeSelf*/ true); AR.isValid(); + ++AR) + if (MBB->isLiveIn(*AR)) + return true; + return false; +} + /// Analyze MBB to check if its terminating branch can be turned into an /// implicit null check. If yes, append a description of the said null check to /// NullCheckList and return true, else return false. @@ -234,7 +332,7 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( MachineBranchPredicate MBP; - if (TII->AnalyzeBranchPredicate(MBB, MBP, true)) + if (TII->analyzeBranchPredicate(MBB, MBP, true)) return false; // Is the predicate comparing an integer to zero? @@ -319,22 +417,59 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( unsigned PointerReg = MBP.LHS.getReg(); - HazardDetector HD(*TRI); + HazardDetector HD(*TRI, *AA); for (auto MII = NotNullSucc->begin(), MIE = NotNullSucc->end(); MII != MIE; ++MII) { - MachineInstr *MI = &*MII; - unsigned BaseReg, Offset; + MachineInstr &MI = *MII; + unsigned BaseReg; + int64_t Offset; + MachineInstr *Dependency = nullptr; if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI)) - if (MI->mayLoad() && !MI->isPredicable() && BaseReg == PointerReg && - Offset < PageSize && MI->getDesc().getNumDefs() <= 1 && - HD.isSafeToHoist(MI)) { - NullCheckList.emplace_back(MI, MBP.ConditionDef, &MBB, NotNullSucc, - NullSucc); - return true; + if (MI.mayLoad() && !MI.isPredicable() && BaseReg == PointerReg && + Offset < PageSize && MI.getDesc().getNumDefs() <= 1 && + HD.isSafeToHoist(&MI, Dependency)) { + + auto DependencyOperandIsOk = [&](MachineOperand &MO) { + assert(!(MO.isReg() && MO.isUse()) && + "No transitive dependendencies please!"); + if (!MO.isReg() || !MO.getReg() || !MO.isDef()) + return true; + + // Make sure that we won't clobber any live ins to the sibling block + // by hoisting Dependency. For instance, we can't hoist INST to + // before the null check (even if it safe, and does not violate any + // dependencies in the non_null_block) if %rdx is live in to + // _null_block. + // + // test %rcx, %rcx + // je _null_block + // _non_null_block: + // %rdx<def> = INST + // ... + if (AnyAliasLiveIn(TRI, NullSucc, MO.getReg())) + return false; + + // Make sure Dependency isn't re-defining the base register. Then we + // won't get the memory operation on the address we want. + if (TRI->regsOverlap(MO.getReg(), BaseReg)) + return false; + + return true; + }; + + bool DependencyOperandsAreOk = + !Dependency || + all_of(Dependency->operands(), DependencyOperandIsOk); + + if (DependencyOperandsAreOk) { + NullCheckList.emplace_back(&MI, MBP.ConditionDef, &MBB, NotNullSucc, + NullSucc, Dependency); + return true; + } } - HD.rememberInstruction(MI); + HD.rememberInstruction(&MI); if (HD.isClobbered()) return false; } @@ -344,11 +479,12 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( /// Wrap a machine load instruction, LoadMI, into a FAULTING_LOAD_OP machine /// instruction. The FAULTING_LOAD_OP instruction does the same load as LoadMI -/// (defining the same register), and branches to HandlerLabel if the load +/// (defining the same register), and branches to HandlerMBB if the load /// faults. The FAULTING_LOAD_OP instruction is inserted at the end of MBB. -MachineInstr *ImplicitNullChecks::insertFaultingLoad(MachineInstr *LoadMI, - MachineBasicBlock *MBB, - MCSymbol *HandlerLabel) { +MachineInstr * +ImplicitNullChecks::insertFaultingLoad(MachineInstr *LoadMI, + MachineBasicBlock *MBB, + MachineBasicBlock *HandlerMBB) { const unsigned NoRegister = 0; // Guaranteed to be the NoRegister value for // all targets. @@ -364,7 +500,7 @@ MachineInstr *ImplicitNullChecks::insertFaultingLoad(MachineInstr *LoadMI, } auto MIB = BuildMI(MBB, DL, TII->get(TargetOpcode::FAULTING_LOAD_OP), DefReg) - .addSym(HandlerLabel) + .addMBB(HandlerMBB) .addImm(LoadMI->getOpcode()); for (auto &MO : LoadMI->uses()) @@ -381,28 +517,51 @@ void ImplicitNullChecks::rewriteNullChecks( DebugLoc DL; for (auto &NC : NullCheckList) { - MCSymbol *HandlerLabel = MMI->getContext().createTempSymbol(); - // Remove the conditional branch dependent on the null check. - unsigned BranchesRemoved = TII->RemoveBranch(*NC.CheckBlock); + unsigned BranchesRemoved = TII->RemoveBranch(*NC.getCheckBlock()); (void)BranchesRemoved; assert(BranchesRemoved > 0 && "expected at least one branch!"); + if (auto *DepMI = NC.getOnlyDependency()) { + DepMI->removeFromParent(); + NC.getCheckBlock()->insert(NC.getCheckBlock()->end(), DepMI); + } + // Insert a faulting load where the conditional branch was originally. We // check earlier ensures that this bit of code motion is legal. We do not // touch the successors list for any basic block since we haven't changed // control flow, we've just made it implicit. - insertFaultingLoad(NC.MemOperation, NC.CheckBlock, HandlerLabel); - NC.MemOperation->eraseFromParent(); - NC.CheckOperation->eraseFromParent(); + MachineInstr *FaultingLoad = insertFaultingLoad( + NC.getMemOperation(), NC.getCheckBlock(), NC.getNullSucc()); + // Now the values defined by MemOperation, if any, are live-in of + // the block of MemOperation. + // The original load operation may define implicit-defs alongside + // the loaded value. + MachineBasicBlock *MBB = NC.getMemOperation()->getParent(); + for (const MachineOperand &MO : FaultingLoad->operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || MBB->isLiveIn(Reg)) + continue; + MBB->addLiveIn(Reg); + } - // Insert an *unconditional* branch to not-null successor. - TII->InsertBranch(*NC.CheckBlock, NC.NotNullSucc, nullptr, /*Cond=*/None, - DL); + if (auto *DepMI = NC.getOnlyDependency()) { + for (auto &MO : DepMI->operands()) { + if (!MO.isReg() || !MO.getReg() || !MO.isDef()) + continue; + if (!NC.getNotNullSucc()->isLiveIn(MO.getReg())) + NC.getNotNullSucc()->addLiveIn(MO.getReg()); + } + } + + NC.getMemOperation()->eraseFromParent(); + NC.getCheckOperation()->eraseFromParent(); - // Emit the HandlerLabel as an EH_LABEL. - BuildMI(*NC.NullSucc, NC.NullSucc->begin(), DL, - TII->get(TargetOpcode::EH_LABEL)).addSym(HandlerLabel); + // Insert an *unconditional* branch to not-null successor. + TII->InsertBranch(*NC.getCheckBlock(), NC.getNotNullSucc(), nullptr, + /*Cond=*/None, DL); NumImplicitNullChecks++; } @@ -412,5 +571,6 @@ char ImplicitNullChecks::ID = 0; char &llvm::ImplicitNullChecksID = ImplicitNullChecks::ID; INITIALIZE_PASS_BEGIN(ImplicitNullChecks, "implicit-null-checks", "Implicit null checks", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(ImplicitNullChecks, "implicit-null-checks", "Implicit null checks", false, false) diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp index e310132..197db77 100644 --- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp @@ -13,6 +13,8 @@ //===----------------------------------------------------------------------===// #include "Spiller.h" +#include "SplitKit.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/TinyPtrVector.h" @@ -30,6 +32,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -48,13 +51,82 @@ STATISTIC(NumReloadsRemoved, "Number of reloads removed"); STATISTIC(NumFolded, "Number of folded stack accesses"); STATISTIC(NumFoldedLoads, "Number of folded loads"); STATISTIC(NumRemats, "Number of rematerialized defs for spilling"); -STATISTIC(NumOmitReloadSpill, "Number of omitted spills of reloads"); -STATISTIC(NumHoists, "Number of hoisted spills"); static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden, cl::desc("Disable inline spill hoisting")); namespace { +class HoistSpillHelper : private LiveRangeEdit::Delegate { + MachineFunction &MF; + LiveIntervals &LIS; + LiveStacks &LSS; + AliasAnalysis *AA; + MachineDominatorTree &MDT; + MachineLoopInfo &Loops; + VirtRegMap &VRM; + MachineFrameInfo &MFI; + MachineRegisterInfo &MRI; + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; + const MachineBlockFrequencyInfo &MBFI; + + InsertPointAnalysis IPA; + + // Map from StackSlot to its original register. + DenseMap<int, unsigned> StackSlotToReg; + // Map from pair of (StackSlot and Original VNI) to a set of spills which + // have the same stackslot and have equal values defined by Original VNI. + // These spills are mergeable and are hoist candiates. + typedef MapVector<std::pair<int, VNInfo *>, SmallPtrSet<MachineInstr *, 16>> + MergeableSpillsMap; + MergeableSpillsMap MergeableSpills; + + /// This is the map from original register to a set containing all its + /// siblings. To hoist a spill to another BB, we need to find out a live + /// sibling there and use it as the source of the new spill. + DenseMap<unsigned, SmallSetVector<unsigned, 16>> Virt2SiblingsMap; + + bool isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI, MachineBasicBlock &BB, + unsigned &LiveReg); + + void rmRedundantSpills( + SmallPtrSet<MachineInstr *, 16> &Spills, + SmallVectorImpl<MachineInstr *> &SpillsToRm, + DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill); + + void getVisitOrders( + MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills, + SmallVectorImpl<MachineDomTreeNode *> &Orders, + SmallVectorImpl<MachineInstr *> &SpillsToRm, + DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep, + DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill); + + void runHoistSpills(unsigned OrigReg, VNInfo &OrigVNI, + SmallPtrSet<MachineInstr *, 16> &Spills, + SmallVectorImpl<MachineInstr *> &SpillsToRm, + DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns); + +public: + HoistSpillHelper(MachineFunctionPass &pass, MachineFunction &mf, + VirtRegMap &vrm) + : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()), + LSS(pass.getAnalysis<LiveStacks>()), + AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()), + MDT(pass.getAnalysis<MachineDominatorTree>()), + Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm), + MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()), + TII(*mf.getSubtarget().getInstrInfo()), + TRI(*mf.getSubtarget().getRegisterInfo()), + MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()), + IPA(LIS, mf.getNumBlockIDs()) {} + + void addToMergeableSpills(MachineInstr &Spill, int StackSlot, + unsigned Original); + bool rmFromMergeableSpills(MachineInstr &Spill, int StackSlot); + void hoistAllSpills(); + void LRE_DidCloneVirtReg(unsigned, unsigned) override; +}; + class InlineSpiller : public Spiller { MachineFunction &MF; LiveIntervals &LIS; @@ -85,56 +157,12 @@ class InlineSpiller : public Spiller { // Values that failed to remat at some point. SmallPtrSet<VNInfo*, 8> UsedValues; -public: - // Information about a value that was defined by a copy from a sibling - // register. - struct SibValueInfo { - // True when all reaching defs were reloads: No spill is necessary. - bool AllDefsAreReloads; - - // True when value is defined by an original PHI not from splitting. - bool DefByOrigPHI; - - // True when the COPY defining this value killed its source. - bool KillsSource; - - // The preferred register to spill. - unsigned SpillReg; - - // The value of SpillReg that should be spilled. - VNInfo *SpillVNI; - - // The block where SpillVNI should be spilled. Currently, this must be the - // block containing SpillVNI->def. - MachineBasicBlock *SpillMBB; - - // A defining instruction that is not a sibling copy or a reload, or NULL. - // This can be used as a template for rematerialization. - MachineInstr *DefMI; - - // List of values that depend on this one. These values are actually the - // same, but live range splitting has placed them in different registers, - // or SSA update needed to insert PHI-defs to preserve SSA form. This is - // copies of the current value and phi-kills. Usually only phi-kills cause - // more than one dependent value. - TinyPtrVector<VNInfo*> Deps; - - SibValueInfo(unsigned Reg, VNInfo *VNI) - : AllDefsAreReloads(true), DefByOrigPHI(false), KillsSource(false), - SpillReg(Reg), SpillVNI(VNI), SpillMBB(nullptr), DefMI(nullptr) {} - - // Returns true when a def has been found. - bool hasDef() const { return DefByOrigPHI || DefMI; } - }; - -private: - // Values in RegsToSpill defined by sibling copies. - typedef DenseMap<VNInfo*, SibValueInfo> SibValueMap; - SibValueMap SibValues; - // Dead defs generated during spilling. SmallVector<MachineInstr*, 8> DeadDefs; + // Object records spills information and does the hoisting. + HoistSpillHelper HSpiller; + ~InlineSpiller() override {} public: @@ -147,9 +175,11 @@ public: MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()), TRI(*mf.getSubtarget().getRegisterInfo()), - MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {} + MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()), + HSpiller(pass, mf, vrm) {} void spill(LiveRangeEdit &) override; + void postOptimization() override; private: bool isSnippet(const LiveInterval &SnipLI); @@ -161,15 +191,11 @@ private: } bool isSibling(unsigned Reg); - MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*); - void propagateSiblingValue(SibValueMap::iterator, VNInfo *VNI = nullptr); - void analyzeSiblingValues(); - - bool hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI); + bool hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstr &CopyMI); void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI); void markValueUsed(LiveInterval*, VNInfo*); - bool reMaterializeFor(LiveInterval&, MachineBasicBlock::iterator MI); + bool reMaterializeFor(LiveInterval &, MachineInstr &MI); void reMaterializeAll(); bool coalesceStackAccess(MachineInstr *MI, unsigned Reg); @@ -210,13 +236,13 @@ Spiller *createInlineSpiller(MachineFunctionPass &pass, /// isFullCopyOf - If MI is a COPY to or from Reg, return the other register, /// otherwise return 0. -static unsigned isFullCopyOf(const MachineInstr *MI, unsigned Reg) { - if (!MI->isFullCopy()) +static unsigned isFullCopyOf(const MachineInstr &MI, unsigned Reg) { + if (!MI.isFullCopy()) return 0; - if (MI->getOperand(0).getReg() == Reg) - return MI->getOperand(1).getReg(); - if (MI->getOperand(1).getReg() == Reg) - return MI->getOperand(0).getReg(); + if (MI.getOperand(0).getReg() == Reg) + return MI.getOperand(1).getReg(); + if (MI.getOperand(1).getReg() == Reg) + return MI.getOperand(0).getReg(); return 0; } @@ -242,7 +268,7 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { for (MachineRegisterInfo::reg_instr_nodbg_iterator RI = MRI.reg_instr_nodbg_begin(SnipLI.reg), E = MRI.reg_instr_nodbg_end(); RI != E; ) { - MachineInstr *MI = &*(RI++); + MachineInstr &MI = *RI++; // Allow copies to/from Reg. if (isFullCopyOf(MI, Reg)) @@ -258,9 +284,9 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { continue; // Allow a single additional instruction. - if (UseMI && MI != UseMI) + if (UseMI && &MI != UseMI) return false; - UseMI = MI; + UseMI = &MI; } return true; } @@ -281,14 +307,14 @@ void InlineSpiller::collectRegsToSpill() { for (MachineRegisterInfo::reg_instr_iterator RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); RI != E; ) { - MachineInstr *MI = &*(RI++); + MachineInstr &MI = *RI++; unsigned SnipReg = isFullCopyOf(MI, Reg); if (!isSibling(SnipReg)) continue; LiveInterval &SnipLI = LIS.getInterval(SnipReg); if (!isSnippet(SnipLI)) continue; - SnippetCopies.insert(MI); + SnippetCopies.insert(&MI); if (isRegToSpill(SnipReg)) continue; RegsToSpill.push_back(SnipReg); @@ -297,418 +323,46 @@ void InlineSpiller::collectRegsToSpill() { } } - -//===----------------------------------------------------------------------===// -// Sibling Values -//===----------------------------------------------------------------------===// - -// After live range splitting, some values to be spilled may be defined by -// copies from sibling registers. We trace the sibling copies back to the -// original value if it still exists. We need it for rematerialization. -// -// Even when the value can't be rematerialized, we still want to determine if -// the value has already been spilled, or we may want to hoist the spill from a -// loop. - bool InlineSpiller::isSibling(unsigned Reg) { return TargetRegisterInfo::isVirtualRegister(Reg) && VRM.getOriginal(Reg) == Original; } -#ifndef NDEBUG -static raw_ostream &operator<<(raw_ostream &OS, - const InlineSpiller::SibValueInfo &SVI) { - OS << "spill " << PrintReg(SVI.SpillReg) << ':' - << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def; - if (SVI.SpillMBB) - OS << " in BB#" << SVI.SpillMBB->getNumber(); - if (SVI.AllDefsAreReloads) - OS << " all-reloads"; - if (SVI.DefByOrigPHI) - OS << " orig-phi"; - if (SVI.KillsSource) - OS << " kill"; - OS << " deps["; - for (VNInfo *Dep : SVI.Deps) - OS << ' ' << Dep->id << '@' << Dep->def; - OS << " ]"; - if (SVI.DefMI) - OS << " def: " << *SVI.DefMI; - else - OS << '\n'; - return OS; -} -#endif - -/// propagateSiblingValue - Propagate the value in SVI to dependents if it is -/// known. Otherwise remember the dependency for later. +/// It is beneficial to spill to earlier place in the same BB in case +/// as follows: +/// There is an alternative def earlier in the same MBB. +/// Hoist the spill as far as possible in SpillMBB. This can ease +/// register pressure: /// -/// @param SVIIter SibValues entry to propagate. -/// @param VNI Dependent value, or NULL to propagate to all saved dependents. -void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter, - VNInfo *VNI) { - SibValueMap::value_type *SVI = &*SVIIter; - - // When VNI is non-NULL, add it to SVI's deps, and only propagate to that. - TinyPtrVector<VNInfo*> FirstDeps; - if (VNI) { - FirstDeps.push_back(VNI); - SVI->second.Deps.push_back(VNI); - } - - // Has the value been completely determined yet? If not, defer propagation. - if (!SVI->second.hasDef()) - return; - - // Work list of values to propagate. - SmallSetVector<SibValueMap::value_type *, 8> WorkList; - WorkList.insert(SVI); - - do { - SVI = WorkList.pop_back_val(); - TinyPtrVector<VNInfo*> *Deps = VNI ? &FirstDeps : &SVI->second.Deps; - VNI = nullptr; - - SibValueInfo &SV = SVI->second; - if (!SV.SpillMBB) - SV.SpillMBB = LIS.getMBBFromIndex(SV.SpillVNI->def); - - DEBUG(dbgs() << " prop to " << Deps->size() << ": " - << SVI->first->id << '@' << SVI->first->def << ":\t" << SV); - - assert(SV.hasDef() && "Propagating undefined value"); - - // Should this value be propagated as a preferred spill candidate? We don't - // propagate values of registers that are about to spill. - bool PropSpill = !DisableHoisting && !isRegToSpill(SV.SpillReg); - unsigned SpillDepth = ~0u; - - for (VNInfo *Dep : *Deps) { - SibValueMap::iterator DepSVI = SibValues.find(Dep); - assert(DepSVI != SibValues.end() && "Dependent value not in SibValues"); - SibValueInfo &DepSV = DepSVI->second; - if (!DepSV.SpillMBB) - DepSV.SpillMBB = LIS.getMBBFromIndex(DepSV.SpillVNI->def); - - bool Changed = false; - - // Propagate defining instruction. - if (!DepSV.hasDef()) { - Changed = true; - DepSV.DefMI = SV.DefMI; - DepSV.DefByOrigPHI = SV.DefByOrigPHI; - } - - // Propagate AllDefsAreReloads. For PHI values, this computes an AND of - // all predecessors. - if (!SV.AllDefsAreReloads && DepSV.AllDefsAreReloads) { - Changed = true; - DepSV.AllDefsAreReloads = false; - } - - // Propagate best spill value. - if (PropSpill && SV.SpillVNI != DepSV.SpillVNI) { - if (SV.SpillMBB == DepSV.SpillMBB) { - // DepSV is in the same block. Hoist when dominated. - if (DepSV.KillsSource && SV.SpillVNI->def < DepSV.SpillVNI->def) { - // This is an alternative def earlier in the same MBB. - // Hoist the spill as far as possible in SpillMBB. This can ease - // register pressure: - // - // x = def - // y = use x - // s = copy x - // - // Hoisting the spill of s to immediately after the def removes the - // interference between x and y: - // - // x = def - // spill x - // y = use x<kill> - // - // This hoist only helps when the DepSV copy kills its source. - Changed = true; - DepSV.SpillReg = SV.SpillReg; - DepSV.SpillVNI = SV.SpillVNI; - DepSV.SpillMBB = SV.SpillMBB; - } - } else { - // DepSV is in a different block. - if (SpillDepth == ~0u) - SpillDepth = Loops.getLoopDepth(SV.SpillMBB); - - // Also hoist spills to blocks with smaller loop depth, but make sure - // that the new value dominates. Non-phi dependents are always - // dominated, phis need checking. - - const BranchProbability MarginProb(4, 5); // 80% - // Hoist a spill to outer loop if there are multiple dependents (it - // can be beneficial if more than one dependents are hoisted) or - // if DepSV (the hoisting source) is hotter than SV (the hoisting - // destination) (we add a 80% margin to bias a little towards - // loop depth). - bool HoistCondition = - (MBFI.getBlockFreq(DepSV.SpillMBB) >= - (MBFI.getBlockFreq(SV.SpillMBB) * MarginProb)) || - Deps->size() > 1; - - if ((Loops.getLoopDepth(DepSV.SpillMBB) > SpillDepth) && - HoistCondition && - (!DepSVI->first->isPHIDef() || - MDT.dominates(SV.SpillMBB, DepSV.SpillMBB))) { - Changed = true; - DepSV.SpillReg = SV.SpillReg; - DepSV.SpillVNI = SV.SpillVNI; - DepSV.SpillMBB = SV.SpillMBB; - } - } - } - - if (!Changed) - continue; - - // Something changed in DepSVI. Propagate to dependents. - WorkList.insert(&*DepSVI); - - DEBUG(dbgs() << " update " << DepSVI->first->id << '@' - << DepSVI->first->def << " to:\t" << DepSV); - } - } while (!WorkList.empty()); -} - -/// traceSiblingValue - Trace a value that is about to be spilled back to the -/// real defining instructions by looking through sibling copies. Always stay -/// within the range of OrigVNI so the registers are known to carry the same -/// value. +/// x = def +/// y = use x +/// s = copy x /// -/// Determine if the value is defined by all reloads, so spilling isn't -/// necessary - the value is already in the stack slot. +/// Hoisting the spill of s to immediately after the def removes the +/// interference between x and y: /// -/// Return a defining instruction that may be a candidate for rematerialization. +/// x = def +/// spill x +/// y = use x<kill> /// -MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, - VNInfo *OrigVNI) { - // Check if a cached value already exists. - SibValueMap::iterator SVI; - bool Inserted; - std::tie(SVI, Inserted) = - SibValues.insert(std::make_pair(UseVNI, SibValueInfo(UseReg, UseVNI))); - if (!Inserted) { - DEBUG(dbgs() << "Cached value " << PrintReg(UseReg) << ':' - << UseVNI->id << '@' << UseVNI->def << ' ' << SVI->second); - return SVI->second.DefMI; - } - - DEBUG(dbgs() << "Tracing value " << PrintReg(UseReg) << ':' - << UseVNI->id << '@' << UseVNI->def << '\n'); - - // List of (Reg, VNI) that have been inserted into SibValues, but need to be - // processed. - SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList; - WorkList.push_back(std::make_pair(UseReg, UseVNI)); - - LiveInterval &OrigLI = LIS.getInterval(Original); - do { - unsigned Reg; - VNInfo *VNI; - std::tie(Reg, VNI) = WorkList.pop_back_val(); - DEBUG(dbgs() << " " << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def - << ":\t"); - - // First check if this value has already been computed. - SVI = SibValues.find(VNI); - assert(SVI != SibValues.end() && "Missing SibValues entry"); - - // Trace through PHI-defs created by live range splitting. - if (VNI->isPHIDef()) { - // Stop at original PHIs. We don't know the value at the - // predecessors. Look up the VNInfo for the current definition - // in OrigLI, to properly determine whether or not this phi was - // added by splitting. - if (VNI->def == OrigLI.getVNInfoAt(VNI->def)->def) { - DEBUG(dbgs() << "orig phi value\n"); - SVI->second.DefByOrigPHI = true; - SVI->second.AllDefsAreReloads = false; - propagateSiblingValue(SVI); - continue; - } - - // This is a PHI inserted by live range splitting. We could trace the - // live-out value from predecessor blocks, but that search can be very - // expensive if there are many predecessors and many more PHIs as - // generated by tail-dup when it sees an indirectbr. Instead, look at - // all the non-PHI defs that have the same value as OrigVNI. They must - // jointly dominate VNI->def. This is not optimal since VNI may actually - // be jointly dominated by a smaller subset of defs, so there is a change - // we will miss a AllDefsAreReloads optimization. - - // Separate all values dominated by OrigVNI into PHIs and non-PHIs. - SmallVector<VNInfo*, 8> PHIs, NonPHIs; - LiveInterval &LI = LIS.getInterval(Reg); - - for (LiveInterval::vni_iterator VI = LI.vni_begin(), VE = LI.vni_end(); - VI != VE; ++VI) { - VNInfo *VNI2 = *VI; - if (VNI2->isUnused()) - continue; - if (!OrigLI.containsOneValue() && - OrigLI.getVNInfoAt(VNI2->def) != OrigVNI) - continue; - if (VNI2->isPHIDef() && VNI2->def != OrigVNI->def) - PHIs.push_back(VNI2); - else - NonPHIs.push_back(VNI2); - } - DEBUG(dbgs() << "split phi value, checking " << PHIs.size() - << " phi-defs, and " << NonPHIs.size() - << " non-phi/orig defs\n"); - - // Create entries for all the PHIs. Don't add them to the worklist, we - // are processing all of them in one go here. - for (VNInfo *PHI : PHIs) - SibValues.insert(std::make_pair(PHI, SibValueInfo(Reg, PHI))); - - // Add every PHI as a dependent of all the non-PHIs. - for (VNInfo *NonPHI : NonPHIs) { - // Known value? Try an insertion. - std::tie(SVI, Inserted) = - SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI))); - // Add all the PHIs as dependents of NonPHI. - SVI->second.Deps.insert(SVI->second.Deps.end(), PHIs.begin(), - PHIs.end()); - // This is the first time we see NonPHI, add it to the worklist. - if (Inserted) - WorkList.push_back(std::make_pair(Reg, NonPHI)); - else - // Propagate to all inserted PHIs, not just VNI. - propagateSiblingValue(SVI); - } - - // Next work list item. - continue; - } - - MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); - assert(MI && "Missing def"); - - // Trace through sibling copies. - if (unsigned SrcReg = isFullCopyOf(MI, Reg)) { - if (isSibling(SrcReg)) { - LiveInterval &SrcLI = LIS.getInterval(SrcReg); - LiveQueryResult SrcQ = SrcLI.Query(VNI->def); - assert(SrcQ.valueIn() && "Copy from non-existing value"); - // Check if this COPY kills its source. - SVI->second.KillsSource = SrcQ.isKill(); - VNInfo *SrcVNI = SrcQ.valueIn(); - DEBUG(dbgs() << "copy of " << PrintReg(SrcReg) << ':' - << SrcVNI->id << '@' << SrcVNI->def - << " kill=" << unsigned(SVI->second.KillsSource) << '\n'); - // Known sibling source value? Try an insertion. - std::tie(SVI, Inserted) = SibValues.insert( - std::make_pair(SrcVNI, SibValueInfo(SrcReg, SrcVNI))); - // This is the first time we see Src, add it to the worklist. - if (Inserted) - WorkList.push_back(std::make_pair(SrcReg, SrcVNI)); - propagateSiblingValue(SVI, VNI); - // Next work list item. - continue; - } - } - - // Track reachable reloads. - SVI->second.DefMI = MI; - SVI->second.SpillMBB = MI->getParent(); - int FI; - if (Reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) { - DEBUG(dbgs() << "reload\n"); - propagateSiblingValue(SVI); - // Next work list item. - continue; - } - - // Potential remat candidate. - DEBUG(dbgs() << "def " << *MI); - SVI->second.AllDefsAreReloads = false; - propagateSiblingValue(SVI); - } while (!WorkList.empty()); - - // Look up the value we were looking for. We already did this lookup at the - // top of the function, but SibValues may have been invalidated. - SVI = SibValues.find(UseVNI); - assert(SVI != SibValues.end() && "Didn't compute requested info"); - DEBUG(dbgs() << " traced to:\t" << SVI->second); - return SVI->second.DefMI; -} - -/// analyzeSiblingValues - Trace values defined by sibling copies back to -/// something that isn't a sibling copy. +/// This hoist only helps when the copy kills its source. /// -/// Keep track of values that may be rematerializable. -void InlineSpiller::analyzeSiblingValues() { - SibValues.clear(); - - // No siblings at all? - if (Edit->getReg() == Original) - return; - - LiveInterval &OrigLI = LIS.getInterval(Original); - for (unsigned Reg : RegsToSpill) { - LiveInterval &LI = LIS.getInterval(Reg); - for (LiveInterval::const_vni_iterator VI = LI.vni_begin(), - VE = LI.vni_end(); VI != VE; ++VI) { - VNInfo *VNI = *VI; - if (VNI->isUnused()) - continue; - MachineInstr *DefMI = nullptr; - if (!VNI->isPHIDef()) { - DefMI = LIS.getInstructionFromIndex(VNI->def); - assert(DefMI && "No defining instruction"); - } - // Check possible sibling copies. - if (VNI->isPHIDef() || DefMI->isCopy()) { - VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def); - assert(OrigVNI && "Def outside original live range"); - if (OrigVNI->def != VNI->def) - DefMI = traceSiblingValue(Reg, VNI, OrigVNI); - } - if (DefMI && Edit->checkRematerializable(VNI, DefMI, AA)) { - DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@' - << VNI->def << " may remat from " << *DefMI); - } - } - } -} - -/// hoistSpill - Given a sibling copy that defines a value to be spilled, insert -/// a spill at a better location. -bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { +bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI, + MachineInstr &CopyMI) { SlotIndex Idx = LIS.getInstructionIndex(CopyMI); +#ifndef NDEBUG VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot()); assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy"); - SibValueMap::iterator I = SibValues.find(VNI); - if (I == SibValues.end()) - return false; - - const SibValueInfo &SVI = I->second; +#endif - // Let the normal folding code deal with the boring case. - if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI) + unsigned SrcReg = CopyMI.getOperand(1).getReg(); + LiveInterval &SrcLI = LIS.getInterval(SrcReg); + VNInfo *SrcVNI = SrcLI.getVNInfoAt(Idx); + LiveQueryResult SrcQ = SrcLI.Query(Idx); + MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(SrcVNI->def); + if (DefMBB != CopyMI.getParent() || !SrcQ.isKill()) return false; - // SpillReg may have been deleted by remat and DCE. - if (!LIS.hasInterval(SVI.SpillReg)) { - DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n'); - SibValues.erase(I); - return false; - } - - LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg); - if (!SibLI.containsValue(SVI.SpillVNI)) { - DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n'); - SibValues.erase(I); - return false; - } - // Conservatively extend the stack slot range to the range of the original // value. We may be able to do better with stack slot coloring by being more // careful here. @@ -719,35 +373,29 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": " << *StackInt << '\n'); - // Already spilled everywhere. - if (SVI.AllDefsAreReloads) { - DEBUG(dbgs() << "\tno spill needed: " << SVI); - ++NumOmitReloadSpill; - return true; - } - // We are going to spill SVI.SpillVNI immediately after its def, so clear out + // We are going to spill SrcVNI immediately after its def, so clear out // any later spills of the same value. - eliminateRedundantSpills(SibLI, SVI.SpillVNI); + eliminateRedundantSpills(SrcLI, SrcVNI); - MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def); + MachineBasicBlock *MBB = LIS.getMBBFromIndex(SrcVNI->def); MachineBasicBlock::iterator MII; - if (SVI.SpillVNI->isPHIDef()) + if (SrcVNI->isPHIDef()) MII = MBB->SkipPHIsAndLabels(MBB->begin()); else { - MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def); + MachineInstr *DefMI = LIS.getInstructionFromIndex(SrcVNI->def); assert(DefMI && "Defining instruction disappeared"); MII = DefMI; ++MII; } // Insert spill without kill flag immediately after def. - TII.storeRegToStackSlot(*MBB, MII, SVI.SpillReg, false, StackSlot, - MRI.getRegClass(SVI.SpillReg), &TRI); + TII.storeRegToStackSlot(*MBB, MII, SrcReg, false, StackSlot, + MRI.getRegClass(SrcReg), &TRI); --MII; // Point to store instruction. - LIS.InsertMachineInstrInMaps(MII); - DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII); + LIS.InsertMachineInstrInMaps(*MII); + DEBUG(dbgs() << "\thoisted: " << SrcVNI->def << '\t' << *MII); + HSpiller.addToMergeableSpills(*MII, StackSlot, Original); ++NumSpills; - ++NumHoists; return true; } @@ -778,8 +426,8 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { for (MachineRegisterInfo::use_instr_nodbg_iterator UI = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end(); UI != E; ) { - MachineInstr *MI = &*(UI++); - if (!MI->isCopy() && !MI->mayStore()) + MachineInstr &MI = *UI++; + if (!MI.isCopy() && !MI.mayStore()) continue; SlotIndex Idx = LIS.getInstructionIndex(MI); if (LI->getVNInfoAt(Idx) != VNI) @@ -800,12 +448,13 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { // Erase spills. int FI; if (Reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) { - DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << *MI); + DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << MI); // eliminateDeadDefs won't normally remove stores, so switch opcode. - MI->setDesc(TII.get(TargetOpcode::KILL)); - DeadDefs.push_back(MI); + MI.setDesc(TII.get(TargetOpcode::KILL)); + DeadDefs.push_back(&MI); ++NumSpillsRemoved; - --NumSpills; + if (HSpiller.rmFromMergeableSpills(MI, StackSlot)) + --NumSpills; } } } while (!WorkList.empty()); @@ -849,13 +498,12 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { } /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. -bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, - MachineBasicBlock::iterator MI) { +bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { // Analyze instruction SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops; MIBundleOperands::VirtRegInfo RI = - MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops); + MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops); if (!RI.Reads) return false; @@ -865,26 +513,26 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, if (!ParentVNI) { DEBUG(dbgs() << "\tadding <undef> flags: "); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) MO.setIsUndef(); } - DEBUG(dbgs() << UseIdx << '\t' << *MI); + DEBUG(dbgs() << UseIdx << '\t' << MI); return true; } - if (SnippetCopies.count(MI)) + if (SnippetCopies.count(&MI)) return false; - // Use an OrigVNI from traceSiblingValue when ParentVNI is a sibling copy. + LiveInterval &OrigLI = LIS.getInterval(Original); + VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx); LiveRangeEdit::Remat RM(ParentVNI); - SibValueMap::const_iterator SibI = SibValues.find(ParentVNI); - if (SibI != SibValues.end()) - RM.OrigMI = SibI->second.DefMI; - if (!Edit->canRematerializeAt(RM, UseIdx, false)) { + RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def); + + if (!Edit->canRematerializeAt(RM, OrigVNI, UseIdx, false)) { markValueUsed(&VirtReg, ParentVNI); - DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI); + DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI); return false; } @@ -892,7 +540,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, // same register for uses and defs. if (RI.Tied) { markValueUsed(&VirtReg, ParentVNI); - DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI); + DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << MI); return false; } @@ -909,8 +557,8 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, unsigned NewVReg = Edit->createFrom(Original); // Finally we can rematerialize OrigMI before MI. - SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewVReg, RM, - TRI); + SlotIndex DefIdx = + Edit->rematerializeAt(*MI.getParent(), MI, NewVReg, RM, TRI); (void)DefIdx; DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *LIS.getInstructionFromIndex(DefIdx)); @@ -923,7 +571,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MO.setIsKill(); } } - DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI << '\n'); + DEBUG(dbgs() << "\t " << UseIdx << '\t' << MI << '\n'); ++NumRemats; return true; @@ -932,7 +580,6 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, /// reMaterializeAll - Try to rematerialize as many uses as possible, /// and trim the live ranges after. void InlineSpiller::reMaterializeAll() { - // analyzeSiblingValues has already tested all relevant defining instructions. if (!Edit->anyRematerializable(AA)) return; @@ -945,10 +592,10 @@ void InlineSpiller::reMaterializeAll() { for (MachineRegisterInfo::reg_bundle_iterator RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end(); RegI != E; ) { - MachineInstr *MI = &*(RegI++); + MachineInstr &MI = *RegI++; // Debug values are not allowed to affect codegen. - if (MI->isDebugValue()) + if (MI.isDebugValue()) continue; anyRemat |= reMaterializeFor(LI, MI); @@ -979,20 +626,22 @@ void InlineSpiller::reMaterializeAll() { if (DeadDefs.empty()) return; DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n"); - Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); - - // Get rid of deleted and empty intervals. + Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA); + + // LiveRangeEdit::eliminateDeadDef is used to remove dead define instructions + // after rematerialization. To remove a VNI for a vreg from its LiveInterval, + // LiveIntervals::removeVRegDefAt is used. However, after non-PHI VNIs are all + // removed, PHI VNI are still left in the LiveInterval. + // So to get rid of unused reg, we need to check whether it has non-dbg + // reference instead of whether it has non-empty interval. unsigned ResultPos = 0; for (unsigned Reg : RegsToSpill) { - if (!LIS.hasInterval(Reg)) - continue; - - LiveInterval &LI = LIS.getInterval(Reg); - if (LI.empty()) { + if (MRI.reg_nodbg_empty(Reg)) { Edit->eraseVirtReg(Reg); continue; } - + assert((LIS.hasInterval(Reg) && !LIS.getInterval(Reg).empty()) && + "Reg with empty interval has reference"); RegsToSpill[ResultPos++] = Reg; } RegsToSpill.erase(RegsToSpill.begin() + ResultPos, RegsToSpill.end()); @@ -1007,17 +656,20 @@ void InlineSpiller::reMaterializeAll() { /// If MI is a load or store of StackSlot, it can be removed. bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) { int FI = 0; - unsigned InstrReg = TII.isLoadFromStackSlot(MI, FI); + unsigned InstrReg = TII.isLoadFromStackSlot(*MI, FI); bool IsLoad = InstrReg; if (!IsLoad) - InstrReg = TII.isStoreToStackSlot(MI, FI); + InstrReg = TII.isStoreToStackSlot(*MI, FI); // We have a stack access. Is it the right register and slot? if (InstrReg != Reg || FI != StackSlot) return false; + if (!IsLoad) + HSpiller.rmFromMergeableSpills(*MI, StackSlot); + DEBUG(dbgs() << "Coalescing stack access: " << *MI); - LIS.RemoveMachineInstrFromMaps(MI); + LIS.RemoveMachineInstrFromMaps(*MI); MI->eraseFromParent(); if (IsLoad) { @@ -1049,7 +701,7 @@ static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B, dbgs() << '\t' << header << ": " << NextLine; for (MachineBasicBlock::iterator I = B; I != E; ++I) { - SlotIndex Idx = LIS.getInstructionIndex(I).getRegSlot(); + SlotIndex Idx = LIS.getInstructionIndex(*I).getRegSlot(); // If a register was passed in and this instruction has it as a // destination that is marked as an early clobber, print the @@ -1113,13 +765,13 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, MachineInstrSpan MIS(MI); MachineInstr *FoldMI = - LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI) - : TII.foldMemoryOperand(MI, FoldOps, StackSlot); + LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS) + : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS); if (!FoldMI) return false; // Remove LIS for any dead defs in the original MI not in FoldMI. - for (MIBundleOperands MO(MI); MO.isValid(); ++MO) { + for (MIBundleOperands MO(*MI); MO.isValid(); ++MO) { if (!MO->isReg()) continue; unsigned Reg = MO->getReg(); @@ -1131,23 +783,27 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, if (MO->isUse()) continue; MIBundleOperands::PhysRegInfo RI = - MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI); + MIBundleOperands(*FoldMI).analyzePhysReg(Reg, &TRI); if (RI.FullyDefined) continue; // FoldMI does not define this physreg. Remove the LI segment. assert(MO->isDead() && "Cannot fold physreg def"); - SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); + SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot(); LIS.removePhysRegDefAt(Reg, Idx); } - LIS.ReplaceMachineInstrInMaps(MI, FoldMI); + int FI; + if (TII.isStoreToStackSlot(*MI, FI) && + HSpiller.rmFromMergeableSpills(*MI, FI)) + --NumSpills; + LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI); MI->eraseFromParent(); // Insert any new instructions other than FoldMI into the LIS maps. assert(!MIS.empty() && "Unexpected empty span of instructions!"); for (MachineInstr &MI : MIS) if (&MI != FoldMI) - LIS.InsertMachineInstrInMaps(&MI); + LIS.InsertMachineInstrInMaps(MI); // TII.foldMemoryOperand may have left some implicit operands on the // instruction. Strip them. @@ -1165,9 +821,10 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, if (!WasCopy) ++NumFolded; - else if (Ops.front().second == 0) + else if (Ops.front().second == 0) { ++NumSpills; - else + HSpiller.addToMergeableSpills(*FoldMI, StackSlot, Original); + } else ++NumReloads; return true; } @@ -1202,6 +859,7 @@ void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill, DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS, "spill")); ++NumSpills; + HSpiller.addToMergeableSpills(*std::next(MI), StackSlot, Original); } /// spillAroundUses - insert spill code around each use of Reg. @@ -1246,17 +904,17 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Analyze instruction. SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops; MIBundleOperands::VirtRegInfo RI = - MIBundleOperands(MI).analyzeVirtReg(Reg, &Ops); + MIBundleOperands(*MI).analyzeVirtReg(Reg, &Ops); // Find the slot index where this instruction reads and writes OldLI. // This is usually the def slot, except for tied early clobbers. - SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); + SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot(); if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true))) if (SlotIndex::isSameInstr(Idx, VNI->def)) Idx = VNI->def; // Check for a sibling copy. - unsigned SibReg = isFullCopyOf(MI, Reg); + unsigned SibReg = isFullCopyOf(*MI, Reg); if (SibReg && isSibling(SibReg)) { // This may actually be a copy between snippets. if (isRegToSpill(SibReg)) { @@ -1265,8 +923,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { continue; } if (RI.Writes) { - // Hoist the spill of a sib-reg copy. - if (hoistSpill(OldLI, MI)) { + if (hoistSpillInsideBB(OldLI, *MI)) { // This COPY is now dead, the value is already in the stack slot. MI->getOperand(0).setIsDead(); DeadDefs.push_back(MI); @@ -1339,7 +996,7 @@ void InlineSpiller::spillAll() { // Hoisted spills may cause dead code. if (!DeadDefs.empty()) { DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n"); - Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); + Edit->eliminateDeadDefs(DeadDefs, RegsToSpill, AA); } // Finally delete the SnippetCopies. @@ -1347,11 +1004,11 @@ void InlineSpiller::spillAll() { for (MachineRegisterInfo::reg_instr_iterator RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); RI != E; ) { - MachineInstr *MI = &*(RI++); - assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy"); + MachineInstr &MI = *(RI++); + assert(SnippetCopies.count(&MI) && "Remaining use wasn't a snippet copy"); // FIXME: Do this with a LiveRangeEdit callback. LIS.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); + MI.eraseFromParent(); } } @@ -1379,7 +1036,6 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { assert(DeadDefs.empty() && "Previous spill didn't remove dead defs"); collectRegsToSpill(); - analyzeSiblingValues(); reMaterializeAll(); // Remat may handle everything. @@ -1388,3 +1044,413 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { Edit->calculateRegClassAndHint(MF, Loops, MBFI); } + +/// Optimizations after all the reg selections and spills are done. +/// +void InlineSpiller::postOptimization() { HSpiller.hoistAllSpills(); } + +/// When a spill is inserted, add the spill to MergeableSpills map. +/// +void HoistSpillHelper::addToMergeableSpills(MachineInstr &Spill, int StackSlot, + unsigned Original) { + StackSlotToReg[StackSlot] = Original; + SlotIndex Idx = LIS.getInstructionIndex(Spill); + VNInfo *OrigVNI = LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot()); + std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI); + MergeableSpills[MIdx].insert(&Spill); +} + +/// When a spill is removed, remove the spill from MergeableSpills map. +/// Return true if the spill is removed successfully. +/// +bool HoistSpillHelper::rmFromMergeableSpills(MachineInstr &Spill, + int StackSlot) { + int Original = StackSlotToReg[StackSlot]; + if (!Original) + return false; + SlotIndex Idx = LIS.getInstructionIndex(Spill); + VNInfo *OrigVNI = LIS.getInterval(Original).getVNInfoAt(Idx.getRegSlot()); + std::pair<int, VNInfo *> MIdx = std::make_pair(StackSlot, OrigVNI); + return MergeableSpills[MIdx].erase(&Spill); +} + +/// Check BB to see if it is a possible target BB to place a hoisted spill, +/// i.e., there should be a living sibling of OrigReg at the insert point. +/// +bool HoistSpillHelper::isSpillCandBB(unsigned OrigReg, VNInfo &OrigVNI, + MachineBasicBlock &BB, unsigned &LiveReg) { + SlotIndex Idx; + LiveInterval &OrigLI = LIS.getInterval(OrigReg); + MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, BB); + if (MI != BB.end()) + Idx = LIS.getInstructionIndex(*MI); + else + Idx = LIS.getMBBEndIdx(&BB).getPrevSlot(); + SmallSetVector<unsigned, 16> &Siblings = Virt2SiblingsMap[OrigReg]; + assert((LIS.getInterval(OrigReg)).getVNInfoAt(Idx) == &OrigVNI && + "Unexpected VNI"); + + for (auto const SibReg : Siblings) { + LiveInterval &LI = LIS.getInterval(SibReg); + VNInfo *VNI = LI.getVNInfoAt(Idx); + if (VNI) { + LiveReg = SibReg; + return true; + } + } + return false; +} + +/// Remove redundant spills in the same BB. Save those redundant spills in +/// SpillsToRm, and save the spill to keep and its BB in SpillBBToSpill map. +/// +void HoistSpillHelper::rmRedundantSpills( + SmallPtrSet<MachineInstr *, 16> &Spills, + SmallVectorImpl<MachineInstr *> &SpillsToRm, + DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) { + // For each spill saw, check SpillBBToSpill[] and see if its BB already has + // another spill inside. If a BB contains more than one spill, only keep the + // earlier spill with smaller SlotIndex. + for (const auto CurrentSpill : Spills) { + MachineBasicBlock *Block = CurrentSpill->getParent(); + MachineDomTreeNode *Node = MDT.DT->getNode(Block); + MachineInstr *PrevSpill = SpillBBToSpill[Node]; + if (PrevSpill) { + SlotIndex PIdx = LIS.getInstructionIndex(*PrevSpill); + SlotIndex CIdx = LIS.getInstructionIndex(*CurrentSpill); + MachineInstr *SpillToRm = (CIdx > PIdx) ? CurrentSpill : PrevSpill; + MachineInstr *SpillToKeep = (CIdx > PIdx) ? PrevSpill : CurrentSpill; + SpillsToRm.push_back(SpillToRm); + SpillBBToSpill[MDT.DT->getNode(Block)] = SpillToKeep; + } else { + SpillBBToSpill[MDT.DT->getNode(Block)] = CurrentSpill; + } + } + for (const auto SpillToRm : SpillsToRm) + Spills.erase(SpillToRm); +} + +/// Starting from \p Root find a top-down traversal order of the dominator +/// tree to visit all basic blocks containing the elements of \p Spills. +/// Redundant spills will be found and put into \p SpillsToRm at the same +/// time. \p SpillBBToSpill will be populated as part of the process and +/// maps a basic block to the first store occurring in the basic block. +/// \post SpillsToRm.union(Spills\@post) == Spills\@pre +/// +void HoistSpillHelper::getVisitOrders( + MachineBasicBlock *Root, SmallPtrSet<MachineInstr *, 16> &Spills, + SmallVectorImpl<MachineDomTreeNode *> &Orders, + SmallVectorImpl<MachineInstr *> &SpillsToRm, + DenseMap<MachineDomTreeNode *, unsigned> &SpillsToKeep, + DenseMap<MachineDomTreeNode *, MachineInstr *> &SpillBBToSpill) { + // The set contains all the possible BB nodes to which we may hoist + // original spills. + SmallPtrSet<MachineDomTreeNode *, 8> WorkSet; + // Save the BB nodes on the path from the first BB node containing + // non-redundant spill to the Root node. + SmallPtrSet<MachineDomTreeNode *, 8> NodesOnPath; + // All the spills to be hoisted must originate from a single def instruction + // to the OrigReg. It means the def instruction should dominate all the spills + // to be hoisted. We choose the BB where the def instruction is located as + // the Root. + MachineDomTreeNode *RootIDomNode = MDT[Root]->getIDom(); + // For every node on the dominator tree with spill, walk up on the dominator + // tree towards the Root node until it is reached. If there is other node + // containing spill in the middle of the path, the previous spill saw will + // be redundant and the node containing it will be removed. All the nodes on + // the path starting from the first node with non-redundant spill to the Root + // node will be added to the WorkSet, which will contain all the possible + // locations where spills may be hoisted to after the loop below is done. + for (const auto Spill : Spills) { + MachineBasicBlock *Block = Spill->getParent(); + MachineDomTreeNode *Node = MDT[Block]; + MachineInstr *SpillToRm = nullptr; + while (Node != RootIDomNode) { + // If Node dominates Block, and it already contains a spill, the spill in + // Block will be redundant. + if (Node != MDT[Block] && SpillBBToSpill[Node]) { + SpillToRm = SpillBBToSpill[MDT[Block]]; + break; + /// If we see the Node already in WorkSet, the path from the Node to + /// the Root node must already be traversed by another spill. + /// Then no need to repeat. + } else if (WorkSet.count(Node)) { + break; + } else { + NodesOnPath.insert(Node); + } + Node = Node->getIDom(); + } + if (SpillToRm) { + SpillsToRm.push_back(SpillToRm); + } else { + // Add a BB containing the original spills to SpillsToKeep -- i.e., + // set the initial status before hoisting start. The value of BBs + // containing original spills is set to 0, in order to descriminate + // with BBs containing hoisted spills which will be inserted to + // SpillsToKeep later during hoisting. + SpillsToKeep[MDT[Block]] = 0; + WorkSet.insert(NodesOnPath.begin(), NodesOnPath.end()); + } + NodesOnPath.clear(); + } + + // Sort the nodes in WorkSet in top-down order and save the nodes + // in Orders. Orders will be used for hoisting in runHoistSpills. + unsigned idx = 0; + Orders.push_back(MDT.DT->getNode(Root)); + do { + MachineDomTreeNode *Node = Orders[idx++]; + const std::vector<MachineDomTreeNode *> &Children = Node->getChildren(); + unsigned NumChildren = Children.size(); + for (unsigned i = 0; i != NumChildren; ++i) { + MachineDomTreeNode *Child = Children[i]; + if (WorkSet.count(Child)) + Orders.push_back(Child); + } + } while (idx != Orders.size()); + assert(Orders.size() == WorkSet.size() && + "Orders have different size with WorkSet"); + +#ifndef NDEBUG + DEBUG(dbgs() << "Orders size is " << Orders.size() << "\n"); + SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin(); + for (; RIt != Orders.rend(); RIt++) + DEBUG(dbgs() << "BB" << (*RIt)->getBlock()->getNumber() << ","); + DEBUG(dbgs() << "\n"); +#endif +} + +/// Try to hoist spills according to BB hotness. The spills to removed will +/// be saved in \p SpillsToRm. The spills to be inserted will be saved in +/// \p SpillsToIns. +/// +void HoistSpillHelper::runHoistSpills( + unsigned OrigReg, VNInfo &OrigVNI, SmallPtrSet<MachineInstr *, 16> &Spills, + SmallVectorImpl<MachineInstr *> &SpillsToRm, + DenseMap<MachineBasicBlock *, unsigned> &SpillsToIns) { + // Visit order of dominator tree nodes. + SmallVector<MachineDomTreeNode *, 32> Orders; + // SpillsToKeep contains all the nodes where spills are to be inserted + // during hoisting. If the spill to be inserted is an original spill + // (not a hoisted one), the value of the map entry is 0. If the spill + // is a hoisted spill, the value of the map entry is the VReg to be used + // as the source of the spill. + DenseMap<MachineDomTreeNode *, unsigned> SpillsToKeep; + // Map from BB to the first spill inside of it. + DenseMap<MachineDomTreeNode *, MachineInstr *> SpillBBToSpill; + + rmRedundantSpills(Spills, SpillsToRm, SpillBBToSpill); + + MachineBasicBlock *Root = LIS.getMBBFromIndex(OrigVNI.def); + getVisitOrders(Root, Spills, Orders, SpillsToRm, SpillsToKeep, + SpillBBToSpill); + + // SpillsInSubTreeMap keeps the map from a dom tree node to a pair of + // nodes set and the cost of all the spills inside those nodes. + // The nodes set are the locations where spills are to be inserted + // in the subtree of current node. + typedef std::pair<SmallPtrSet<MachineDomTreeNode *, 16>, BlockFrequency> + NodesCostPair; + DenseMap<MachineDomTreeNode *, NodesCostPair> SpillsInSubTreeMap; + // Iterate Orders set in reverse order, which will be a bottom-up order + // in the dominator tree. Once we visit a dom tree node, we know its + // children have already been visited and the spill locations in the + // subtrees of all the children have been determined. + SmallVector<MachineDomTreeNode *, 32>::reverse_iterator RIt = Orders.rbegin(); + for (; RIt != Orders.rend(); RIt++) { + MachineBasicBlock *Block = (*RIt)->getBlock(); + + // If Block contains an original spill, simply continue. + if (SpillsToKeep.find(*RIt) != SpillsToKeep.end() && !SpillsToKeep[*RIt]) { + SpillsInSubTreeMap[*RIt].first.insert(*RIt); + // SpillsInSubTreeMap[*RIt].second contains the cost of spill. + SpillsInSubTreeMap[*RIt].second = MBFI.getBlockFreq(Block); + continue; + } + + // Collect spills in subtree of current node (*RIt) to + // SpillsInSubTreeMap[*RIt].first. + const std::vector<MachineDomTreeNode *> &Children = (*RIt)->getChildren(); + unsigned NumChildren = Children.size(); + for (unsigned i = 0; i != NumChildren; ++i) { + MachineDomTreeNode *Child = Children[i]; + if (SpillsInSubTreeMap.find(Child) == SpillsInSubTreeMap.end()) + continue; + // The stmt "SpillsInSubTree = SpillsInSubTreeMap[*RIt].first" below + // should be placed before getting the begin and end iterators of + // SpillsInSubTreeMap[Child].first, or else the iterators may be + // invalidated when SpillsInSubTreeMap[*RIt] is seen the first time + // and the map grows and then the original buckets in the map are moved. + SmallPtrSet<MachineDomTreeNode *, 16> &SpillsInSubTree = + SpillsInSubTreeMap[*RIt].first; + BlockFrequency &SubTreeCost = SpillsInSubTreeMap[*RIt].second; + SubTreeCost += SpillsInSubTreeMap[Child].second; + auto BI = SpillsInSubTreeMap[Child].first.begin(); + auto EI = SpillsInSubTreeMap[Child].first.end(); + SpillsInSubTree.insert(BI, EI); + SpillsInSubTreeMap.erase(Child); + } + + SmallPtrSet<MachineDomTreeNode *, 16> &SpillsInSubTree = + SpillsInSubTreeMap[*RIt].first; + BlockFrequency &SubTreeCost = SpillsInSubTreeMap[*RIt].second; + // No spills in subtree, simply continue. + if (SpillsInSubTree.empty()) + continue; + + // Check whether Block is a possible candidate to insert spill. + unsigned LiveReg = 0; + if (!isSpillCandBB(OrigReg, OrigVNI, *Block, LiveReg)) + continue; + + // If there are multiple spills that could be merged, bias a little + // to hoist the spill. + BranchProbability MarginProb = (SpillsInSubTree.size() > 1) + ? BranchProbability(9, 10) + : BranchProbability(1, 1); + if (SubTreeCost > MBFI.getBlockFreq(Block) * MarginProb) { + // Hoist: Move spills to current Block. + for (const auto SpillBB : SpillsInSubTree) { + // When SpillBB is a BB contains original spill, insert the spill + // to SpillsToRm. + if (SpillsToKeep.find(SpillBB) != SpillsToKeep.end() && + !SpillsToKeep[SpillBB]) { + MachineInstr *SpillToRm = SpillBBToSpill[SpillBB]; + SpillsToRm.push_back(SpillToRm); + } + // SpillBB will not contain spill anymore, remove it from SpillsToKeep. + SpillsToKeep.erase(SpillBB); + } + // Current Block is the BB containing the new hoisted spill. Add it to + // SpillsToKeep. LiveReg is the source of the new spill. + SpillsToKeep[*RIt] = LiveReg; + DEBUG({ + dbgs() << "spills in BB: "; + for (const auto Rspill : SpillsInSubTree) + dbgs() << Rspill->getBlock()->getNumber() << " "; + dbgs() << "were promoted to BB" << (*RIt)->getBlock()->getNumber() + << "\n"; + }); + SpillsInSubTree.clear(); + SpillsInSubTree.insert(*RIt); + SubTreeCost = MBFI.getBlockFreq(Block); + } + } + // For spills in SpillsToKeep with LiveReg set (i.e., not original spill), + // save them to SpillsToIns. + for (const auto Ent : SpillsToKeep) { + if (Ent.second) + SpillsToIns[Ent.first->getBlock()] = Ent.second; + } +} + +/// For spills with equal values, remove redundant spills and hoist those left +/// to less hot spots. +/// +/// Spills with equal values will be collected into the same set in +/// MergeableSpills when spill is inserted. These equal spills are originated +/// from the same defining instruction and are dominated by the instruction. +/// Before hoisting all the equal spills, redundant spills inside in the same +/// BB are first marked to be deleted. Then starting from the spills left, walk +/// up on the dominator tree towards the Root node where the define instruction +/// is located, mark the dominated spills to be deleted along the way and +/// collect the BB nodes on the path from non-dominated spills to the define +/// instruction into a WorkSet. The nodes in WorkSet are the candidate places +/// where we are considering to hoist the spills. We iterate the WorkSet in +/// bottom-up order, and for each node, we will decide whether to hoist spills +/// inside its subtree to that node. In this way, we can get benefit locally +/// even if hoisting all the equal spills to one cold place is impossible. +/// +void HoistSpillHelper::hoistAllSpills() { + SmallVector<unsigned, 4> NewVRegs; + LiveRangeEdit Edit(nullptr, NewVRegs, MF, LIS, &VRM, this); + + // Save the mapping between stackslot and its original reg. + DenseMap<int, unsigned> SlotToOrigReg; + for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + int Slot = VRM.getStackSlot(Reg); + if (Slot != VirtRegMap::NO_STACK_SLOT) + SlotToOrigReg[Slot] = VRM.getOriginal(Reg); + unsigned Original = VRM.getPreSplitReg(Reg); + if (!MRI.def_empty(Reg)) + Virt2SiblingsMap[Original].insert(Reg); + } + + // Each entry in MergeableSpills contains a spill set with equal values. + for (auto &Ent : MergeableSpills) { + int Slot = Ent.first.first; + unsigned OrigReg = SlotToOrigReg[Slot]; + LiveInterval &OrigLI = LIS.getInterval(OrigReg); + VNInfo *OrigVNI = Ent.first.second; + SmallPtrSet<MachineInstr *, 16> &EqValSpills = Ent.second; + if (Ent.second.empty()) + continue; + + DEBUG({ + dbgs() << "\nFor Slot" << Slot << " and VN" << OrigVNI->id << ":\n" + << "Equal spills in BB: "; + for (const auto spill : EqValSpills) + dbgs() << spill->getParent()->getNumber() << " "; + dbgs() << "\n"; + }); + + // SpillsToRm is the spill set to be removed from EqValSpills. + SmallVector<MachineInstr *, 16> SpillsToRm; + // SpillsToIns is the spill set to be newly inserted after hoisting. + DenseMap<MachineBasicBlock *, unsigned> SpillsToIns; + + runHoistSpills(OrigReg, *OrigVNI, EqValSpills, SpillsToRm, SpillsToIns); + + DEBUG({ + dbgs() << "Finally inserted spills in BB: "; + for (const auto Ispill : SpillsToIns) + dbgs() << Ispill.first->getNumber() << " "; + dbgs() << "\nFinally removed spills in BB: "; + for (const auto Rspill : SpillsToRm) + dbgs() << Rspill->getParent()->getNumber() << " "; + dbgs() << "\n"; + }); + + // Stack live range update. + LiveInterval &StackIntvl = LSS.getInterval(Slot); + if (!SpillsToIns.empty() || !SpillsToRm.empty()) + StackIntvl.MergeValueInAsValue(OrigLI, OrigVNI, + StackIntvl.getValNumInfo(0)); + + // Insert hoisted spills. + for (auto const Insert : SpillsToIns) { + MachineBasicBlock *BB = Insert.first; + unsigned LiveReg = Insert.second; + MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, *BB); + TII.storeRegToStackSlot(*BB, MI, LiveReg, false, Slot, + MRI.getRegClass(LiveReg), &TRI); + LIS.InsertMachineInstrRangeInMaps(std::prev(MI), MI); + ++NumSpills; + } + + // Remove redundant spills or change them to dead instructions. + NumSpills -= SpillsToRm.size(); + for (auto const RMEnt : SpillsToRm) { + RMEnt->setDesc(TII.get(TargetOpcode::KILL)); + for (unsigned i = RMEnt->getNumOperands(); i; --i) { + MachineOperand &MO = RMEnt->getOperand(i - 1); + if (MO.isReg() && MO.isImplicit() && MO.isDef() && !MO.isDead()) + RMEnt->RemoveOperand(i - 1); + } + } + Edit.eliminateDeadDefs(SpillsToRm, None, AA); + } +} + +/// For VirtReg clone, the \p New register should have the same physreg or +/// stackslot as the \p old register. +void HoistSpillHelper::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { + if (VRM.hasPhys(Old)) + VRM.assignVirt2Phys(New, VRM.getPhys(Old)); + else if (VRM.getStackSlot(Old) != VirtRegMap::NO_STACK_SLOT) + VRM.assignVirt2StackSlot(New, VRM.getStackSlot(Old)); + else + llvm_unreachable("VReg should be assigned either physreg or stackslot"); +} diff --git a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 724f1d6..3f11119 100644 --- a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -1,6 +1,6 @@ -//=----------------------- InterleavedAccessPass.cpp -----------------------==// +//===--------------------- InterleavedAccessPass.cpp ----------------------===// // -// The LLVM Compiler Infrastructure +// The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. @@ -8,16 +8,18 @@ //===----------------------------------------------------------------------===// // // This file implements the Interleaved Access pass, which identifies -// interleaved memory accesses and transforms into target specific intrinsics. +// interleaved memory accesses and transforms them into target specific +// intrinsics. // // An interleaved load reads data from memory into several vectors, with // DE-interleaving the data on a factor. An interleaved store writes several // vectors to memory with RE-interleaving the data on a factor. // -// As interleaved accesses are hard to be identified in CodeGen (mainly because -// the VECTOR_SHUFFLE DAG node is quite different from the shufflevector IR), -// we identify and transform them to intrinsics in this pass. So the intrinsics -// can be easily matched into target specific instructions later in CodeGen. +// As interleaved accesses are difficult to identified in CodeGen (mainly +// because the VECTOR_SHUFFLE DAG node is quite different from the shufflevector +// IR), we identify and transform them to intrinsics in this pass so the +// intrinsics can be easily matched into target specific instructions later in +// CodeGen. // // E.g. An interleaved load (Factor = 2): // %wide.vec = load <8 x i32>, <8 x i32>* %ptr @@ -38,6 +40,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/InstIterator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" @@ -56,10 +59,6 @@ static cl::opt<bool> LowerInterleavedAccesses( static unsigned MaxFactor; // The maximum supported interleave factor. -namespace llvm { -static void initializeInterleavedAccessPass(PassRegistry &); -} - namespace { class InterleavedAccess : public FunctionPass { @@ -67,7 +66,7 @@ class InterleavedAccess : public FunctionPass { public: static char ID; InterleavedAccess(const TargetMachine *TM = nullptr) - : FunctionPass(ID), TM(TM), TLI(nullptr) { + : FunctionPass(ID), DT(nullptr), TM(TM), TLI(nullptr) { initializeInterleavedAccessPass(*PassRegistry::getPassRegistry()); } @@ -75,7 +74,13 @@ public: bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + } + private: + DominatorTree *DT; const TargetMachine *TM; const TargetLowering *TLI; @@ -86,13 +91,26 @@ private: /// \brief Transform an interleaved store into target specific intrinsics. bool lowerInterleavedStore(StoreInst *SI, SmallVector<Instruction *, 32> &DeadInsts); + + /// \brief Returns true if the uses of an interleaved load by the + /// extractelement instructions in \p Extracts can be replaced by uses of the + /// shufflevector instructions in \p Shuffles instead. If so, the necessary + /// replacements are also performed. + bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts, + ArrayRef<ShuffleVectorInst *> Shuffles); }; } // end anonymous namespace. char InterleavedAccess::ID = 0; -INITIALIZE_TM_PASS(InterleavedAccess, "interleaved-access", - "Lower interleaved memory accesses to target specific intrinsics", - false, false) +INITIALIZE_TM_PASS_BEGIN( + InterleavedAccess, "interleaved-access", + "Lower interleaved memory accesses to target specific intrinsics", false, + false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_TM_PASS_END( + InterleavedAccess, "interleaved-access", + "Lower interleaved memory accesses to target specific intrinsics", false, + false) FunctionPass *llvm::createInterleavedAccessPass(const TargetMachine *TM) { return new InterleavedAccess(TM); @@ -181,9 +199,18 @@ bool InterleavedAccess::lowerInterleavedLoad( return false; SmallVector<ShuffleVectorInst *, 4> Shuffles; + SmallVector<ExtractElementInst *, 4> Extracts; - // Check if all users of this load are shufflevectors. + // Check if all users of this load are shufflevectors. If we encounter any + // users that are extractelement instructions, we save them to later check if + // they can be modifed to extract from one of the shufflevectors instead of + // the load. for (auto UI = LI->user_begin(), E = LI->user_end(); UI != E; UI++) { + auto *Extract = dyn_cast<ExtractElementInst>(*UI); + if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) { + Extracts.push_back(Extract); + continue; + } ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(*UI); if (!SVI || !isa<UndefValue>(SVI->getOperand(1))) return false; @@ -219,6 +246,11 @@ bool InterleavedAccess::lowerInterleavedLoad( Indices.push_back(Index); } + // Try and modify users of the load that are extractelement instructions to + // use the shufflevector instructions instead of the load. + if (!tryReplaceExtracts(Extracts, Shuffles)) + return false; + DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n"); // Try to create target specific intrinsics to replace the load and shuffles. @@ -232,6 +264,73 @@ bool InterleavedAccess::lowerInterleavedLoad( return true; } +bool InterleavedAccess::tryReplaceExtracts( + ArrayRef<ExtractElementInst *> Extracts, + ArrayRef<ShuffleVectorInst *> Shuffles) { + + // If there aren't any extractelement instructions to modify, there's nothing + // to do. + if (Extracts.empty()) + return true; + + // Maps extractelement instructions to vector-index pairs. The extractlement + // instructions will be modified to use the new vector and index operands. + DenseMap<ExtractElementInst *, std::pair<Value *, int>> ReplacementMap; + + for (auto *Extract : Extracts) { + + // The vector index that is extracted. + auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand()); + auto Index = IndexOperand->getSExtValue(); + + // Look for a suitable shufflevector instruction. The goal is to modify the + // extractelement instruction (which uses an interleaved load) to use one + // of the shufflevector instructions instead of the load. + for (auto *Shuffle : Shuffles) { + + // If the shufflevector instruction doesn't dominate the extract, we + // can't create a use of it. + if (!DT->dominates(Shuffle, Extract)) + continue; + + // Inspect the indices of the shufflevector instruction. If the shuffle + // selects the same index that is extracted, we can modify the + // extractelement instruction. + SmallVector<int, 4> Indices; + Shuffle->getShuffleMask(Indices); + for (unsigned I = 0; I < Indices.size(); ++I) + if (Indices[I] == Index) { + assert(Extract->getOperand(0) == Shuffle->getOperand(0) && + "Vector operations do not match"); + ReplacementMap[Extract] = std::make_pair(Shuffle, I); + break; + } + + // If we found a suitable shufflevector instruction, stop looking. + if (ReplacementMap.count(Extract)) + break; + } + + // If we did not find a suitable shufflevector instruction, the + // extractelement instruction cannot be modified, so we must give up. + if (!ReplacementMap.count(Extract)) + return false; + } + + // Finally, perform the replacements. + IRBuilder<> Builder(Extracts[0]->getContext()); + for (auto &Replacement : ReplacementMap) { + auto *Extract = Replacement.first; + auto *Vector = Replacement.second.first; + auto Index = Replacement.second.second; + Builder.SetInsertPoint(Extract); + Extract->replaceAllUsesWith(Builder.CreateExtractElement(Vector, Index)); + Extract->eraseFromParent(); + } + + return true; +} + bool InterleavedAccess::lowerInterleavedStore( StoreInst *SI, SmallVector<Instruction *, 32> &DeadInsts) { if (!SI->isSimple()) @@ -264,6 +363,7 @@ bool InterleavedAccess::runOnFunction(Function &F) { DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n"); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); TLI = TM->getSubtargetImpl(F)->getTargetLowering(); MaxFactor = TLI->getMaxSupportedInterleaveFactor(); diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp index 1c27377..9eb43d2 100644 --- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" @@ -42,6 +43,10 @@ static cl::opt<cl::boolOrDefault> EnableFastISelOption("fast-isel", cl::Hidden, cl::desc("Enable the \"fast\" instruction selector")); +static cl::opt<bool> + EnableGlobalISel("global-isel", cl::Hidden, cl::init(false), + cl::desc("Enable the \"global\" instruction selector")); + void LLVMTargetMachine::initAsmInfo() { MRI = TheTarget.createMCRegInfo(getTargetTriple().str()); MII = TheTarget.createMCInstrInfo(); @@ -65,8 +70,15 @@ void LLVMTargetMachine::initAsmInfo() { if (Options.DisableIntegratedAS) TmpAsmInfo->setUseIntegratedAssembler(false); + TmpAsmInfo->setPreserveAsmComments(Options.MCOptions.PreserveAsmComments); + if (Options.CompressDebugSections) - TmpAsmInfo->setCompressDebugSections(true); + TmpAsmInfo->setCompressDebugSections(DebugCompressionType::DCT_ZlibGnu); + + TmpAsmInfo->setRelaxELFRelocations(Options.RelaxELFRelocations); + + if (Options.ExceptionModel != ExceptionHandling::None) + TmpAsmInfo->setExceptionsType(Options.ExceptionModel); AsmInfo = TmpAsmInfo; } @@ -78,7 +90,10 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) : TargetMachine(T, DataLayoutString, TT, CPU, FS, Options) { - CodeGenInfo = T.createMCCodeGenInfo(TT.str(), RM, CM, OL); + T.adjustCodeGenOpts(TT, RM, CM); + this->RM = RM; + this->CMModel = CM; + this->OptLevel = OL; } TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() { @@ -87,6 +102,20 @@ TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() { }); } +MachineModuleInfo & +LLVMTargetMachine::addMachineModuleInfo(PassManagerBase &PM) const { + MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), + *getMCRegisterInfo(), + getObjFileLowering()); + PM.add(MMI); + return *MMI; +} + +void LLVMTargetMachine::addMachineFunctionAnalysis(PassManagerBase &PM, + MachineFunctionInitializer *MFInitializer) const { + PM.add(new MachineFunctionAnalysis(*this, MFInitializer)); +} + /// addPassesToX helper drives creation and initialization of TargetPassConfig. static MCContext * addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, @@ -94,6 +123,12 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, AnalysisID StartAfter, AnalysisID StopAfter, MachineFunctionInitializer *MFInitializer = nullptr) { + // When in emulated TLS mode, add the LowerEmuTLS pass. + if (TM->Options.EmulatedTLS) + PM.add(createLowerEmuTLSPass(TM)); + + PM.add(createPreISelIntrinsicLoweringPass()); + // Add internal analysis passes from the target machine. PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); @@ -115,14 +150,8 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, PassConfig->addISelPrepare(); - // Install a MachineModuleInfo class, which is an immutable pass that holds - // all the per-module stuff we're generating, including MCContext. - MachineModuleInfo *MMI = new MachineModuleInfo( - *TM->getMCAsmInfo(), *TM->getMCRegisterInfo(), TM->getObjFileLowering()); - PM.add(MMI); - - // Set up a MachineFunction for the rest of CodeGen to work on. - PM.add(new MachineFunctionAnalysis(*TM, MFInitializer)); + MachineModuleInfo &MMI = TM->addMachineModuleInfo(PM); + TM->addMachineFunctionAnalysis(PM, MFInitializer); // Enable FastISel with -fast, but allow that to be overridden. TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE); @@ -132,14 +161,25 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, TM->setFastISel(true); // Ask the target for an isel. - if (PassConfig->addInstSelector()) + if (LLVM_UNLIKELY(EnableGlobalISel)) { + if (PassConfig->addIRTranslator()) + return nullptr; + + // Before running the register bank selector, ask the target if it + // wants to run some passes. + PassConfig->addPreRegBankSelect(); + + if (PassConfig->addRegBankSelect()) + return nullptr; + + } else if (PassConfig->addInstSelector()) return nullptr; PassConfig->addMachinePasses(); PassConfig->setInitialized(); - return &MMI->getContext(); + return &MMI.getContext(); } bool LLVMTargetMachine::addPassesToEmitFile( @@ -154,7 +194,7 @@ bool LLVMTargetMachine::addPassesToEmitFile( return true; if (StopAfter) { - PM.add(createPrintMIRPass(outs())); + PM.add(createPrintMIRPass(Out)); return false; } diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp index be61a20..b810176 100644 --- a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp +++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp @@ -113,8 +113,7 @@ LexicalScope *LexicalScopes::findLexicalScope(const DILocation *DL) { // The scope that we were created with could have an extra file - which // isn't what we care about in this case. - if (auto *File = dyn_cast<DILexicalBlockFile>(Scope)) - Scope = File->getScope(); + Scope = Scope->getNonLexicalBlockFileScope(); if (auto *IA = DL->getInlinedAt()) { auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA)); @@ -140,8 +139,8 @@ LexicalScope *LexicalScopes::getOrCreateLexicalScope(const DILocalScope *Scope, /// getOrCreateRegularScope - Find or create a regular lexical scope. LexicalScope * LexicalScopes::getOrCreateRegularScope(const DILocalScope *Scope) { - if (auto *File = dyn_cast<DILexicalBlockFile>(Scope)) - Scope = File->getScope(); + assert(Scope && "Invalid Scope encoding!"); + Scope = Scope->getNonLexicalBlockFileScope(); auto I = LexicalScopeMap.find(Scope); if (I != LexicalScopeMap.end()) @@ -169,6 +168,8 @@ LexicalScopes::getOrCreateRegularScope(const DILocalScope *Scope) { LexicalScope * LexicalScopes::getOrCreateInlinedScope(const DILocalScope *Scope, const DILocation *InlinedAt) { + assert(Scope && "Invalid Scope encoding!"); + Scope = Scope->getNonLexicalBlockFileScope(); std::pair<const DILocalScope *, const DILocation *> P(Scope, InlinedAt); auto I = InlinedLexicalScopeMap.find(P); if (I != InlinedLexicalScopeMap.end()) @@ -192,9 +193,7 @@ LexicalScopes::getOrCreateInlinedScope(const DILocalScope *Scope, LexicalScope * LexicalScopes::getOrCreateAbstractScope(const DILocalScope *Scope) { assert(Scope && "Invalid Scope encoding!"); - - if (auto *File = dyn_cast<DILexicalBlockFile>(Scope)) - Scope = File->getScope(); + Scope = Scope->getNonLexicalBlockFileScope(); auto I = AbstractScopeMap.find(Scope); if (I != AbstractScopeMap.end()) return &I->second; diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp index b9937e5..4ff88d5 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp @@ -18,22 +18,24 @@ /// //===----------------------------------------------------------------------===// -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/UniqueVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include <queue> #include <list> +#include <queue> using namespace llvm; @@ -43,48 +45,163 @@ STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted"); namespace { +// \brief If @MI is a DBG_VALUE with debug value described by a defined +// register, returns the number of this register. In the other case, returns 0. +static unsigned isDbgValueDescribedByReg(const MachineInstr &MI) { + assert(MI.isDebugValue() && "expected a DBG_VALUE"); + assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE"); + // If location of variable is described using a register (directly + // or indirectly), this register is always a first operand. + return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0; +} + class LiveDebugValues : public MachineFunctionPass { private: const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; + /// Based on std::pair so it can be used as an index into a DenseMap. typedef std::pair<const DILocalVariable *, const DILocation *> - InlinedVariable; - + DebugVariableBase; /// A potentially inlined instance of a variable. - struct DebugVariable { - const DILocalVariable *Var; - const DILocation *InlinedAt; + struct DebugVariable : public DebugVariableBase { + DebugVariable(const DILocalVariable *Var, const DILocation *InlinedAt) + : DebugVariableBase(Var, InlinedAt) {} - DebugVariable(const DILocalVariable *_var, const DILocation *_inlinedAt) - : Var(_var), InlinedAt(_inlinedAt) {} + const DILocalVariable *getVar() const { return this->first; }; + const DILocation *getInlinedAt() const { return this->second; }; - bool operator==(const DebugVariable &DV) const { - return (Var == DV.Var) && (InlinedAt == DV.InlinedAt); + bool operator<(const DebugVariable &DV) const { + if (getVar() == DV.getVar()) + return getInlinedAt() < DV.getInlinedAt(); + return getVar() < DV.getVar(); } }; - /// Member variables and functions for Range Extension across basic blocks. + /// A pair of debug variable and value location. struct VarLoc { - DebugVariable Var; - const MachineInstr *MI; // MachineInstr should be a DBG_VALUE instr. + const DebugVariable Var; + const MachineInstr &MI; ///< Only used for cloning a new DBG_VALUE. + + enum { InvalidKind = 0, RegisterKind } Kind; + + /// The value location. Stored separately to avoid repeatedly + /// extracting it from MI. + union { + struct { + uint32_t RegNo; + uint32_t Offset; + } RegisterLoc; + uint64_t Hash; + } Loc; + + VarLoc(const MachineInstr &MI) + : Var(MI.getDebugVariable(), MI.getDebugLoc()->getInlinedAt()), MI(MI), + Kind(InvalidKind) { + static_assert((sizeof(Loc) == sizeof(uint64_t)), + "hash does not cover all members of Loc"); + assert(MI.isDebugValue() && "not a DBG_VALUE"); + assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE"); + if (int RegNo = isDbgValueDescribedByReg(MI)) { + Kind = RegisterKind; + Loc.RegisterLoc.RegNo = RegNo; + uint64_t Offset = + MI.isIndirectDebugValue() ? MI.getOperand(1).getImm() : 0; + // We don't support offsets larger than 4GiB here. They are + // slated to be replaced with DIExpressions anyway. + if (Offset >= (1ULL << 32)) + Kind = InvalidKind; + else + Loc.RegisterLoc.Offset = Offset; + } + } + + /// If this variable is described by a register, return it, + /// otherwise return 0. + unsigned isDescribedByReg() const { + if (Kind == RegisterKind) + return Loc.RegisterLoc.RegNo; + return 0; + } - VarLoc(DebugVariable _var, const MachineInstr *_mi) : Var(_var), MI(_mi) {} + void dump() const { MI.dump(); } - bool operator==(const VarLoc &V) const; + bool operator==(const VarLoc &Other) const { + return Var == Other.Var && Loc.Hash == Other.Loc.Hash; + } + + /// This operator guarantees that VarLocs are sorted by Variable first. + bool operator<(const VarLoc &Other) const { + if (Var == Other.Var) + return Loc.Hash < Other.Loc.Hash; + return Var < Other.Var; + } }; - typedef std::list<VarLoc> VarLocList; - typedef SmallDenseMap<const MachineBasicBlock *, VarLocList> VarLocInMBB; + typedef UniqueVector<VarLoc> VarLocMap; + typedef SparseBitVector<> VarLocSet; + typedef SmallDenseMap<const MachineBasicBlock *, VarLocSet> VarLocInMBB; + + /// This holds the working set of currently open ranges. For fast + /// access, this is done both as a set of VarLocIDs, and a map of + /// DebugVariable to recent VarLocID. Note that a DBG_VALUE ends all + /// previous open ranges for the same variable. + class OpenRangesSet { + VarLocSet VarLocs; + SmallDenseMap<DebugVariableBase, unsigned, 8> Vars; + + public: + const VarLocSet &getVarLocs() const { return VarLocs; } + + /// Terminate all open ranges for Var by removing it from the set. + void erase(DebugVariable Var) { + auto It = Vars.find(Var); + if (It != Vars.end()) { + unsigned ID = It->second; + VarLocs.reset(ID); + Vars.erase(It); + } + } + + /// Terminate all open ranges listed in \c KillSet by removing + /// them from the set. + void erase(const VarLocSet &KillSet, const VarLocMap &VarLocIDs) { + VarLocs.intersectWithComplement(KillSet); + for (unsigned ID : KillSet) + Vars.erase(VarLocIDs[ID].Var); + } + + /// Insert a new range into the set. + void insert(unsigned VarLocID, DebugVariableBase Var) { + VarLocs.set(VarLocID); + Vars.insert({Var, VarLocID}); + } + + /// Empty the set. + void clear() { + VarLocs.clear(); + Vars.clear(); + } + + /// Return whether the set is empty or not. + bool empty() const { + assert(Vars.empty() == VarLocs.empty() && "open ranges are inconsistent"); + return VarLocs.empty(); + } + }; - void transferDebugValue(MachineInstr &MI, VarLocList &OpenRanges); - void transferRegisterDef(MachineInstr &MI, VarLocList &OpenRanges); - bool transferTerminatorInst(MachineInstr &MI, VarLocList &OpenRanges, - VarLocInMBB &OutLocs); - bool transfer(MachineInstr &MI, VarLocList &OpenRanges, VarLocInMBB &OutLocs); + void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs); + void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges, + const VarLocMap &VarLocIDs); + bool transferTerminatorInst(MachineInstr &MI, OpenRangesSet &OpenRanges, + VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs); + bool transfer(MachineInstr &MI, OpenRangesSet &OpenRanges, + VarLocInMBB &OutLocs, VarLocMap &VarLocIDs); - bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs); + bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs, + const VarLocMap &VarLocIDs); bool ExtendRanges(MachineFunction &MF); @@ -98,8 +215,14 @@ public: /// information we preserve. void getAnalysisUsage(AnalysisUsage &AU) const override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } + /// Print to ostream with a message. - void printVarLocInMBB(const VarLocInMBB &V, const char *msg, + void printVarLocInMBB(const MachineFunction &MF, const VarLocInMBB &V, + const VarLocMap &VarLocIDs, const char *msg, raw_ostream &Out) const; /// Calculate the liveness information for the given machine function. @@ -124,109 +247,95 @@ LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) { /// Tell the pass manager which passes we depend on and what information we /// preserve. void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } -// \brief If @MI is a DBG_VALUE with debug value described by a defined -// register, returns the number of this register. In the other case, returns 0. -static unsigned isDescribedByReg(const MachineInstr &MI) { - assert(MI.isDebugValue()); - assert(MI.getNumOperands() == 4); - // If location of variable is described using a register (directly or - // indirecltly), this register is always a first operand. - return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0; -} - -// \brief This function takes two DBG_VALUE instructions and returns true -// if their offsets are equal; otherwise returns false. -static bool areOffsetsEqual(const MachineInstr &MI1, const MachineInstr &MI2) { - assert(MI1.isDebugValue()); - assert(MI1.getNumOperands() == 4); - - assert(MI2.isDebugValue()); - assert(MI2.getNumOperands() == 4); - - if (!MI1.isIndirectDebugValue() && !MI2.isIndirectDebugValue()) - return true; - - // Check if both MIs are indirect and they are equal. - if (MI1.isIndirectDebugValue() && MI2.isIndirectDebugValue()) - return MI1.getOperand(1).getImm() == MI2.getOperand(1).getImm(); - - return false; -} - //===----------------------------------------------------------------------===// // Debug Range Extension Implementation //===----------------------------------------------------------------------===// -void LiveDebugValues::printVarLocInMBB(const VarLocInMBB &V, const char *msg, +void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF, + const VarLocInMBB &V, + const VarLocMap &VarLocIDs, + const char *msg, raw_ostream &Out) const { - Out << "Printing " << msg << ":\n"; - for (const auto &L : V) { - Out << "MBB: " << L.first->getName() << ":\n"; - for (const auto &VLL : L.second) { - Out << " Var: " << VLL.Var.Var->getName(); + for (const MachineBasicBlock &BB : MF) { + const auto &L = V.lookup(&BB); + Out << "MBB: " << BB.getName() << ":\n"; + for (unsigned VLL : L) { + const VarLoc &VL = VarLocIDs[VLL]; + Out << " Var: " << VL.Var.getVar()->getName(); Out << " MI: "; - (*VLL.MI).dump(); + VL.dump(); Out << "\n"; } } Out << "\n"; } -bool LiveDebugValues::VarLoc::operator==(const VarLoc &V) const { - return (Var == V.Var) && (isDescribedByReg(*MI) == isDescribedByReg(*V.MI)) && - (areOffsetsEqual(*MI, *V.MI)); -} - /// End all previous ranges related to @MI and start a new range from @MI /// if it is a DBG_VALUE instr. -void LiveDebugValues::transferDebugValue(MachineInstr &MI, - VarLocList &OpenRanges) { +void LiveDebugValues::transferDebugValue(const MachineInstr &MI, + OpenRangesSet &OpenRanges, + VarLocMap &VarLocIDs) { if (!MI.isDebugValue()) return; - const DILocalVariable *RawVar = MI.getDebugVariable(); - assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) && + const DILocalVariable *Var = MI.getDebugVariable(); + const DILocation *DebugLoc = MI.getDebugLoc(); + const DILocation *InlinedAt = DebugLoc->getInlinedAt(); + assert(Var->isValidLocationForIntrinsic(DebugLoc) && "Expected inlined-at fields to agree"); - DebugVariable Var(RawVar, MI.getDebugLoc()->getInlinedAt()); // End all previous ranges of Var. - OpenRanges.erase( - std::remove_if(OpenRanges.begin(), OpenRanges.end(), - [&](const VarLoc &V) { return (Var == V.Var); }), - OpenRanges.end()); + DebugVariable V(Var, InlinedAt); + OpenRanges.erase(V); - // Add Var to OpenRanges from this DBG_VALUE. + // Add the VarLoc to OpenRanges from this DBG_VALUE. // TODO: Currently handles DBG_VALUE which has only reg as location. - if (isDescribedByReg(MI)) { - VarLoc V(Var, &MI); - OpenRanges.push_back(std::move(V)); + if (isDbgValueDescribedByReg(MI)) { + VarLoc VL(MI); + unsigned ID = VarLocIDs.insert(VL); + OpenRanges.insert(ID, VL.Var); } } /// A definition of a register may mark the end of a range. void LiveDebugValues::transferRegisterDef(MachineInstr &MI, - VarLocList &OpenRanges) { + OpenRangesSet &OpenRanges, + const VarLocMap &VarLocIDs) { + MachineFunction *MF = MI.getParent()->getParent(); + const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); + unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + SparseBitVector<> KillSet; for (const MachineOperand &MO : MI.operands()) { - if (!(MO.isReg() && MO.isDef() && MO.getReg() && - TRI->isPhysicalRegister(MO.getReg()))) - continue; - // Remove ranges of all aliased registers. - for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) - OpenRanges.erase(std::remove_if(OpenRanges.begin(), OpenRanges.end(), - [&](const VarLoc &V) { - return (*RAI == - isDescribedByReg(*V.MI)); - }), - OpenRanges.end()); + if (MO.isReg() && MO.isDef() && MO.getReg() && + TRI->isPhysicalRegister(MO.getReg())) { + // Remove ranges of all aliased registers. + for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) + for (unsigned ID : OpenRanges.getVarLocs()) + if (VarLocIDs[ID].isDescribedByReg() == *RAI) + KillSet.set(ID); + } else if (MO.isRegMask()) { + // Remove ranges of all clobbered registers. Register masks don't usually + // list SP as preserved. While the debug info may be off for an + // instruction or two around callee-cleanup calls, transferring the + // DEBUG_VALUE across the call is still a better user experience. + for (unsigned ID : OpenRanges.getVarLocs()) { + unsigned Reg = VarLocIDs[ID].isDescribedByReg(); + if (Reg && Reg != SP && MO.clobbersPhysReg(Reg)) + KillSet.set(ID); + } + } } + OpenRanges.erase(KillSet, VarLocIDs); } /// Terminate all open ranges at the end of the current basic block. bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI, - VarLocList &OpenRanges, - VarLocInMBB &OutLocs) { + OpenRangesSet &OpenRanges, + VarLocInMBB &OutLocs, + const VarLocMap &VarLocIDs) { bool Changed = false; const MachineBasicBlock *CurMBB = MI.getParent(); if (!(MI.isTerminator() || (&MI == &CurMBB->instr_back()))) @@ -235,29 +344,23 @@ bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI, if (OpenRanges.empty()) return false; - VarLocList &VLL = OutLocs[CurMBB]; - - for (auto OR : OpenRanges) { - // Copy OpenRanges to OutLocs, if not already present. - assert(OR.MI->isDebugValue()); - DEBUG(dbgs() << "Add to OutLocs: "; OR.MI->dump();); - if (std::find_if(VLL.begin(), VLL.end(), - [&](const VarLoc &V) { return (OR == V); }) == VLL.end()) { - VLL.push_back(std::move(OR)); - Changed = true; - } - } + DEBUG(for (unsigned ID : OpenRanges.getVarLocs()) { + // Copy OpenRanges to OutLocs, if not already present. + dbgs() << "Add to OutLocs: "; VarLocIDs[ID].dump(); + }); + VarLocSet &VLS = OutLocs[CurMBB]; + Changed = VLS |= OpenRanges.getVarLocs(); OpenRanges.clear(); return Changed; } /// This routine creates OpenRanges and OutLocs. -bool LiveDebugValues::transfer(MachineInstr &MI, VarLocList &OpenRanges, - VarLocInMBB &OutLocs) { +bool LiveDebugValues::transfer(MachineInstr &MI, OpenRangesSet &OpenRanges, + VarLocInMBB &OutLocs, VarLocMap &VarLocIDs) { bool Changed = false; - transferDebugValue(MI, OpenRanges); - transferRegisterDef(MI, OpenRanges); - Changed = transferTerminatorInst(MI, OpenRanges, OutLocs); + transferDebugValue(MI, OpenRanges, VarLocIDs); + transferRegisterDef(MI, OpenRanges, VarLocIDs); + Changed = transferTerminatorInst(MI, OpenRanges, OutLocs, VarLocIDs); return Changed; } @@ -265,14 +368,14 @@ bool LiveDebugValues::transfer(MachineInstr &MI, VarLocList &OpenRanges, /// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same /// source variable in all the predecessors of @MBB reside in the same location. bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, - VarLocInMBB &InLocs) { + VarLocInMBB &InLocs, const VarLocMap &VarLocIDs) { DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n"); bool Changed = false; - VarLocList InLocsT; // Temporary incoming locations. + VarLocSet InLocsT; // Temporary incoming locations. - // For all predecessors of this MBB, find the set of VarLocs that can be - // joined. + // For all predecessors of this MBB, find the set of VarLocs that + // can be joined. for (auto p : MBB.predecessors()) { auto OL = OutLocs.find(p); // Join is null in case of empty OutLocs from any of the pred. @@ -284,44 +387,34 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, InLocsT = OL->second; continue; } - // Join with this predecessor. - VarLocList &VLL = OL->second; - InLocsT.erase( - std::remove_if(InLocsT.begin(), InLocsT.end(), [&](VarLoc &ILT) { - return (std::find_if(VLL.begin(), VLL.end(), [&](const VarLoc &V) { - return (ILT == V); - }) == VLL.end()); - }), InLocsT.end()); + InLocsT &= OL->second; } if (InLocsT.empty()) return false; - VarLocList &ILL = InLocs[&MBB]; + VarLocSet &ILS = InLocs[&MBB]; // Insert DBG_VALUE instructions, if not already inserted. - for (auto ILT : InLocsT) { - if (std::find_if(ILL.begin(), ILL.end(), [&](const VarLoc &I) { - return (ILT == I); - }) == ILL.end()) { - // This VarLoc is not found in InLocs i.e. it is not yet inserted. So, a - // new range is started for the var from the mbb's beginning by inserting - // a new DBG_VALUE. transfer() will end this range however appropriate. - const MachineInstr *DMI = ILT.MI; - MachineInstr *MI = - BuildMI(MBB, MBB.instr_begin(), DMI->getDebugLoc(), DMI->getDesc(), - DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(), 0, - DMI->getDebugVariable(), DMI->getDebugExpression()); - if (DMI->isIndirectDebugValue()) - MI->getOperand(1).setImm(DMI->getOperand(1).getImm()); - DEBUG(dbgs() << "Inserted: "; MI->dump();); - ++NumInserted; - Changed = true; - - VarLoc V(ILT.Var, MI); - ILL.push_back(std::move(V)); - } + VarLocSet Diff = InLocsT; + Diff.intersectWithComplement(ILS); + for (auto ID : Diff) { + // This VarLoc is not found in InLocs i.e. it is not yet inserted. So, a + // new range is started for the var from the mbb's beginning by inserting + // a new DBG_VALUE. transfer() will end this range however appropriate. + const VarLoc &DiffIt = VarLocIDs[ID]; + const MachineInstr *DMI = &DiffIt.MI; + MachineInstr *MI = + BuildMI(MBB, MBB.instr_begin(), DMI->getDebugLoc(), DMI->getDesc(), + DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(), 0, + DMI->getDebugVariable(), DMI->getDebugExpression()); + if (DMI->isIndirectDebugValue()) + MI->getOperand(1).setImm(DMI->getOperand(1).getImm()); + DEBUG(dbgs() << "Inserted: "; MI->dump();); + ILS.set(ID); + ++NumInserted; + Changed = true; } return Changed; } @@ -336,21 +429,27 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { bool OLChanged = false; bool MBBJoined = false; - VarLocList OpenRanges; // Ranges that are open until end of bb. + VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors. + OpenRangesSet OpenRanges; // Ranges that are open until end of bb. VarLocInMBB OutLocs; // Ranges that exist beyond bb. VarLocInMBB InLocs; // Ranges that are incoming after joining. DenseMap<unsigned int, MachineBasicBlock *> OrderToBB; DenseMap<MachineBasicBlock *, unsigned int> BBToOrder; std::priority_queue<unsigned int, std::vector<unsigned int>, - std::greater<unsigned int>> Worklist; + std::greater<unsigned int>> + Worklist; std::priority_queue<unsigned int, std::vector<unsigned int>, - std::greater<unsigned int>> Pending; + std::greater<unsigned int>> + Pending; + // Initialize every mbb with OutLocs. for (auto &MBB : MF) for (auto &MI : MBB) - transfer(MI, OpenRanges, OutLocs); - DEBUG(printVarLocInMBB(OutLocs, "OutLocs after initialization", dbgs())); + transfer(MI, OpenRanges, OutLocs, VarLocIDs); + + DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "OutLocs after initialization", + dbgs())); ReversePostOrderTraversal<MachineFunction *> RPOT(&MF); unsigned int RPONumber = 0; @@ -360,7 +459,6 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { Worklist.push(RPONumber); ++RPONumber; } - // This is a standard "union of predecessor outs" dataflow problem. // To solve it, we perform join() and transfer() using the two worklist method // until the ranges converge. @@ -373,21 +471,23 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { while (!Worklist.empty()) { MachineBasicBlock *MBB = OrderToBB[Worklist.top()]; Worklist.pop(); - MBBJoined = join(*MBB, OutLocs, InLocs); + MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs); if (MBBJoined) { MBBJoined = false; Changed = true; for (auto &MI : *MBB) - OLChanged |= transfer(MI, OpenRanges, OutLocs); - DEBUG(printVarLocInMBB(OutLocs, "OutLocs after propagating", dbgs())); - DEBUG(printVarLocInMBB(InLocs, "InLocs after propagating", dbgs())); + OLChanged |= transfer(MI, OpenRanges, OutLocs, VarLocIDs); + + DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, + "OutLocs after propagating", dbgs())); + DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, + "InLocs after propagating", dbgs())); if (OLChanged) { OLChanged = false; for (auto s : MBB->successors()) - if (!OnPending.count(s)) { - OnPending.insert(s); + if (OnPending.insert(s).second) { Pending.push(BBToOrder[s]); } } @@ -399,8 +499,8 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { assert(Pending.empty() && "Pending should be empty"); } - DEBUG(printVarLocInMBB(OutLocs, "Final OutLocs", dbgs())); - DEBUG(printVarLocInMBB(InLocs, "Final InLocs", dbgs())); + DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "Final OutLocs", dbgs())); + DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, "Final InLocs", dbgs())); return Changed; } diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp index 6dac7db..966b4f1 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -42,6 +42,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <memory> +#include <utility> using namespace llvm; @@ -84,7 +85,7 @@ class UserValueScopes { SmallPtrSet<const MachineBasicBlock *, 4> LBlocks; public: - UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(D), LS(L) {} + UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(std::move(D)), LS(L) {} /// dominates - Return true if current scope dominates at least one machine /// instruction in a given machine basic block. @@ -141,8 +142,8 @@ public: /// UserValue - Create a new UserValue. UserValue(const MDNode *var, const MDNode *expr, unsigned o, bool i, DebugLoc L, LocMap::Allocator &alloc) - : Variable(var), Expression(expr), offset(o), IsIndirect(i), dl(L), - leader(this), next(nullptr), locInts(alloc) {} + : Variable(var), Expression(expr), offset(o), IsIndirect(i), + dl(std::move(L)), leader(this), next(nullptr), locInts(alloc) {} /// getLeader - Get the leader of this value's equivalence class. UserValue *getLeader() { @@ -172,8 +173,10 @@ public: return L1; // Splice L2 before L1's members. UserValue *End = L2; - while (End->next) - End->leader = L1, End = End->next; + while (End->next) { + End->leader = L1; + End = End->next; + } End->leader = L1; End->next = L1->next; L1->next = L2; @@ -302,7 +305,7 @@ class LDVImpl { /// getUserValue - Find or create a UserValue. UserValue *getUserValue(const MDNode *Var, const MDNode *Expr, - unsigned Offset, bool IsIndirect, DebugLoc DL); + unsigned Offset, bool IsIndirect, const DebugLoc &DL); /// lookupVirtReg - Find the EC leader for VirtReg or null. UserValue *lookupVirtReg(unsigned VirtReg); @@ -311,7 +314,7 @@ class LDVImpl { /// @param MI DBG_VALUE instruction /// @param Idx Last valid SLotIndex before instruction. /// @return True if the DBG_VALUE instruction should be deleted. - bool handleDebugValue(MachineInstr *MI, SlotIndex Idx); + bool handleDebugValue(MachineInstr &MI, SlotIndex Idx); /// collectDebugValues - Collect and erase all DBG_VALUE instructions, adding /// a UserValue def for each instruction. @@ -355,7 +358,7 @@ public: }; } // namespace -static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS, +static void printDebugLoc(const DebugLoc &DL, raw_ostream &CommentOS, const LLVMContext &Ctx) { if (!DL) return; @@ -456,7 +459,7 @@ void UserValue::mapVirtRegs(LDVImpl *LDV) { UserValue *LDVImpl::getUserValue(const MDNode *Var, const MDNode *Expr, unsigned Offset, bool IsIndirect, - DebugLoc DL) { + const DebugLoc &DL) { UserValue *&Leader = userVarMap[Var]; if (Leader) { UserValue *UV = Leader->getLeader(); @@ -485,24 +488,23 @@ UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) { return nullptr; } -bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) { +bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { // DBG_VALUE loc, offset, variable - if (MI->getNumOperands() != 4 || - !(MI->getOperand(1).isReg() || MI->getOperand(1).isImm()) || - !MI->getOperand(2).isMetadata()) { - DEBUG(dbgs() << "Can't handle " << *MI); + if (MI.getNumOperands() != 4 || + !(MI.getOperand(1).isReg() || MI.getOperand(1).isImm()) || + !MI.getOperand(2).isMetadata()) { + DEBUG(dbgs() << "Can't handle " << MI); return false; } // Get or create the UserValue for (variable,offset). - bool IsIndirect = MI->isIndirectDebugValue(); - unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; - const MDNode *Var = MI->getDebugVariable(); - const MDNode *Expr = MI->getDebugExpression(); + bool IsIndirect = MI.isIndirectDebugValue(); + unsigned Offset = IsIndirect ? MI.getOperand(1).getImm() : 0; + const MDNode *Var = MI.getDebugVariable(); + const MDNode *Expr = MI.getDebugExpression(); //here. - UserValue *UV = - getUserValue(Var, Expr, Offset, IsIndirect, MI->getDebugLoc()); - UV->addDef(Idx, MI->getOperand(0)); + UserValue *UV = getUserValue(Var, Expr, Offset, IsIndirect, MI.getDebugLoc()); + UV->addDef(Idx, MI.getOperand(0)); return true; } @@ -518,12 +520,13 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { continue; } // DBG_VALUE has no slot index, use the previous instruction instead. - SlotIndex Idx = MBBI == MBB->begin() ? - LIS->getMBBStartIdx(MBB) : - LIS->getInstructionIndex(std::prev(MBBI)).getRegSlot(); + SlotIndex Idx = + MBBI == MBB->begin() + ? LIS->getMBBStartIdx(MBB) + : LIS->getInstructionIndex(*std::prev(MBBI)).getRegSlot(); // Handle consecutive DBG_VALUE instructions with the same slot index. do { - if (handleDebugValue(MBBI, Idx)) { + if (handleDebugValue(*MBBI, Idx)) { MBBI = MBB->erase(MBBI); Changed = true; } else @@ -554,8 +557,10 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR, Kills->push_back(Start); return; } - if (Segment->end < Stop) - Stop = Segment->end, ToEnd = false; + if (Segment->end < Stop) { + Stop = Segment->end; + ToEnd = false; + } } // There could already be a short def at Start. @@ -569,8 +574,10 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR, } // Limited by the next def. - if (I.valid() && I.start() < Stop) - Stop = I.start(), ToEnd = false; + if (I.valid() && I.start() < Stop) { + Stop = I.start(); + ToEnd = false; + } // Limited by VNI's live range. else if (!ToEnd && Kills) Kills->push_back(Stop); @@ -608,7 +615,7 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, // Is LocNo extended to reach this copy? If not, another def may be blocking // it, or we are looking at a wrong value of LI. - SlotIndex Idx = LIS.getInstructionIndex(MI); + SlotIndex Idx = LIS.getInstructionIndex(*MI); LocMap::iterator I = locInts.find(Idx.getRegSlot(true)); if (!I.valid() || I.value() != LocNo) continue; @@ -1033,7 +1040,7 @@ bool LiveDebugVariables::doInitialization(Module &M) { } #ifndef NDEBUG -void LiveDebugVariables::dump() { +LLVM_DUMP_METHOD void LiveDebugVariables::dump() { if (pImpl) static_cast<LDVImpl*>(pImpl)->print(dbgs()); } diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h index 3d36f4d..afe87a5 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h @@ -21,12 +21,12 @@ #ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H #define LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H -#include "llvm/ADT/ArrayRef.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/IR/DebugInfo.h" namespace llvm { +template <typename T> class ArrayRef; class LiveInterval; class LiveIntervals; class VirtRegMap; diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp index 5015800..93c5ca7 100644 --- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp +++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp @@ -19,8 +19,9 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveInterval.h" + +#include "LiveRangeUtils.h" #include "RegisterCoalescer.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -309,10 +310,12 @@ LiveRange::iterator LiveRange::find(SlotIndex Pos) { size_t Len = size(); do { size_t Mid = Len >> 1; - if (Pos < I[Mid].end) + if (Pos < I[Mid].end) { Len = Mid; - else - I += Mid + 1, Len -= Mid + 1; + } else { + I += Mid + 1; + Len -= Mid + 1; + } } while (Len); return I; } @@ -814,239 +817,6 @@ void LiveInterval::clearSubRanges() { SubRanges = nullptr; } -/// Helper function for constructMainRangeFromSubranges(): Search the CFG -/// backwards until we find a place covered by a LiveRange segment that actually -/// has a valno set. -static VNInfo *searchForVNI(const SlotIndexes &Indexes, LiveRange &LR, - const MachineBasicBlock *MBB, - SmallPtrSetImpl<const MachineBasicBlock*> &Visited) { - // We start the search at the end of MBB. - SlotIndex EndIdx = Indexes.getMBBEndIdx(MBB); - // In our use case we can't live the area covered by the live segments without - // finding an actual VNI def. - LiveRange::iterator I = LR.find(EndIdx.getPrevSlot()); - assert(I != LR.end()); - LiveRange::Segment &S = *I; - if (S.valno != nullptr) - return S.valno; - - VNInfo *VNI = nullptr; - // Continue at predecessors (we could even go to idom with domtree available). - for (const MachineBasicBlock *Pred : MBB->predecessors()) { - // Avoid going in circles. - if (!Visited.insert(Pred).second) - continue; - - VNI = searchForVNI(Indexes, LR, Pred, Visited); - if (VNI != nullptr) { - S.valno = VNI; - break; - } - } - - return VNI; -} - -static void determineMissingVNIs(const SlotIndexes &Indexes, LiveInterval &LI) { - SmallPtrSet<const MachineBasicBlock*, 5> Visited; - - LiveRange::iterator OutIt; - VNInfo *PrevValNo = nullptr; - for (LiveRange::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { - LiveRange::Segment &S = *I; - // Determine final VNI if necessary. - if (S.valno == nullptr) { - // This can only happen at the begin of a basic block. - assert(S.start.isBlock() && "valno should only be missing at block begin"); - - Visited.clear(); - const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start); - for (const MachineBasicBlock *Pred : MBB->predecessors()) { - VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited); - if (VNI != nullptr) { - S.valno = VNI; - break; - } - } - assert(S.valno != nullptr && "could not determine valno"); - } - // Merge with previous segment if it has the same VNI. - if (PrevValNo == S.valno && OutIt->end == S.start) { - OutIt->end = S.end; - } else { - // Didn't merge. Move OutIt to next segment. - if (PrevValNo == nullptr) - OutIt = LI.begin(); - else - ++OutIt; - - if (OutIt != I) - *OutIt = *I; - PrevValNo = S.valno; - } - } - // If we merged some segments chop off the end. - ++OutIt; - LI.segments.erase(OutIt, LI.end()); -} - -void LiveInterval::constructMainRangeFromSubranges( - const SlotIndexes &Indexes, VNInfo::Allocator &VNIAllocator) { - // The basic observations on which this algorithm is based: - // - Each Def/ValNo in a subrange must have a corresponding def on the main - // range, but not further defs/valnos are necessary. - // - If any of the subranges is live at a point the main liverange has to be - // live too, conversily if no subrange is live the main range mustn't be - // live either. - // We do this by scanning through all the subranges simultaneously creating new - // segments in the main range as segments start/ends come up in the subranges. - assert(hasSubRanges() && "expected subranges to be present"); - assert(segments.empty() && valnos.empty() && "expected empty main range"); - - // Collect subrange, iterator pairs for the walk and determine first and last - // SlotIndex involved. - SmallVector<std::pair<const SubRange*, const_iterator>, 4> SRs; - SlotIndex First; - SlotIndex Last; - for (const SubRange &SR : subranges()) { - if (SR.empty()) - continue; - SRs.push_back(std::make_pair(&SR, SR.begin())); - if (!First.isValid() || SR.segments.front().start < First) - First = SR.segments.front().start; - if (!Last.isValid() || SR.segments.back().end > Last) - Last = SR.segments.back().end; - } - - // Walk over all subranges simultaneously. - Segment CurrentSegment; - bool ConstructingSegment = false; - bool NeedVNIFixup = false; - LaneBitmask ActiveMask = 0; - SlotIndex Pos = First; - while (true) { - SlotIndex NextPos = Last; - enum { - NOTHING, - BEGIN_SEGMENT, - END_SEGMENT, - } Event = NOTHING; - // Which subregister lanes are affected by the current event. - LaneBitmask EventMask = 0; - // Whether a BEGIN_SEGMENT is also a valno definition point. - bool IsDef = false; - // Find the next begin or end of a subrange segment. Combine masks if we - // have multiple begins/ends at the same position. Ends take precedence over - // Begins. - for (auto &SRP : SRs) { - const SubRange &SR = *SRP.first; - const_iterator &I = SRP.second; - // Advance iterator of subrange to a segment involving Pos; the earlier - // segments are already merged at this point. - while (I != SR.end() && - (I->end < Pos || - (I->end == Pos && (ActiveMask & SR.LaneMask) == 0))) - ++I; - if (I == SR.end()) - continue; - if ((ActiveMask & SR.LaneMask) == 0 && - Pos <= I->start && I->start <= NextPos) { - // Merge multiple begins at the same position. - if (I->start == NextPos && Event == BEGIN_SEGMENT) { - EventMask |= SR.LaneMask; - IsDef |= I->valno->def == I->start; - } else if (I->start < NextPos || Event != END_SEGMENT) { - Event = BEGIN_SEGMENT; - NextPos = I->start; - EventMask = SR.LaneMask; - IsDef = I->valno->def == I->start; - } - } - if ((ActiveMask & SR.LaneMask) != 0 && - Pos <= I->end && I->end <= NextPos) { - // Merge multiple ends at the same position. - if (I->end == NextPos && Event == END_SEGMENT) - EventMask |= SR.LaneMask; - else { - Event = END_SEGMENT; - NextPos = I->end; - EventMask = SR.LaneMask; - } - } - } - - // Advance scan position. - Pos = NextPos; - if (Event == BEGIN_SEGMENT) { - if (ConstructingSegment && IsDef) { - // Finish previous segment because we have to start a new one. - CurrentSegment.end = Pos; - append(CurrentSegment); - ConstructingSegment = false; - } - - // Start a new segment if necessary. - if (!ConstructingSegment) { - // Determine value number for the segment. - VNInfo *VNI; - if (IsDef) { - VNI = getNextValue(Pos, VNIAllocator); - } else { - // We have to reuse an existing value number, if we are lucky - // then we already passed one of the predecessor blocks and determined - // its value number (with blocks in reverse postorder this would be - // always true but we have no such guarantee). - assert(Pos.isBlock()); - const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(Pos); - // See if any of the predecessor blocks has a lower number and a VNI - for (const MachineBasicBlock *Pred : MBB->predecessors()) { - SlotIndex PredEnd = Indexes.getMBBEndIdx(Pred); - VNI = getVNInfoBefore(PredEnd); - if (VNI != nullptr) - break; - } - // Def will come later: We have to do an extra fixup pass. - if (VNI == nullptr) - NeedVNIFixup = true; - } - - // In rare cases we can produce adjacent segments with the same value - // number (if they come from different subranges, but happen to have - // the same defining instruction). VNIFixup will fix those cases. - if (!empty() && segments.back().end == Pos && - segments.back().valno == VNI) - NeedVNIFixup = true; - CurrentSegment.start = Pos; - CurrentSegment.valno = VNI; - ConstructingSegment = true; - } - ActiveMask |= EventMask; - } else if (Event == END_SEGMENT) { - assert(ConstructingSegment); - // Finish segment if no lane is active anymore. - ActiveMask &= ~EventMask; - if (ActiveMask == 0) { - CurrentSegment.end = Pos; - append(CurrentSegment); - ConstructingSegment = false; - } - } else { - // We reached the end of the last subranges and can stop. - assert(Event == NOTHING); - break; - } - } - - // We might not be able to assign new valnos for all segments if the basic - // block containing the definition comes after a segment using the valno. - // Do a fixup pass for this uncommon case. - if (NeedVNIFixup) - determineMissingVNIs(Indexes, *this); - - assert(ActiveMask == 0 && !ConstructingSegment && "all segments ended"); - verify(); -} - unsigned LiveInterval::getSize() const { unsigned Sum = 0; for (const Segment &S : segments) @@ -1055,12 +825,12 @@ unsigned LiveInterval::getSize() const { } raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange::Segment &S) { - return os << '[' << S.start << ',' << S.end << ':' << S.valno->id << ")"; + return os << '[' << S.start << ',' << S.end << ':' << S.valno->id << ')'; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void LiveRange::Segment::dump() const { - dbgs() << *this << "\n"; +LLVM_DUMP_METHOD void LiveRange::Segment::dump() const { + dbgs() << *this << '\n'; } #endif @@ -1081,10 +851,10 @@ void LiveRange::print(raw_ostream &OS) const { for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e; ++i, ++vnum) { const VNInfo *vni = *i; - if (vnum) OS << " "; - OS << vnum << "@"; + if (vnum) OS << ' '; + OS << vnum << '@'; if (vni->isUnused()) { - OS << "x"; + OS << 'x'; } else { OS << vni->def; if (vni->isPHIDef()) @@ -1094,22 +864,30 @@ void LiveRange::print(raw_ostream &OS) const { } } +void LiveInterval::SubRange::print(raw_ostream &OS) const { + OS << " L" << PrintLaneMask(LaneMask) << ' ' + << static_cast<const LiveRange&>(*this); +} + void LiveInterval::print(raw_ostream &OS) const { OS << PrintReg(reg) << ' '; super::print(OS); // Print subranges - for (const SubRange &SR : subranges()) { - OS << " L" << PrintLaneMask(SR.LaneMask) << ' ' << SR; - } + for (const SubRange &SR : subranges()) + OS << SR; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void LiveRange::dump() const { - dbgs() << *this << "\n"; +LLVM_DUMP_METHOD void LiveRange::dump() const { + dbgs() << *this << '\n'; +} + +LLVM_DUMP_METHOD void LiveInterval::SubRange::dump() const { + dbgs() << *this << '\n'; } -void LiveInterval::dump() const { - dbgs() << *this << "\n"; +LLVM_DUMP_METHOD void LiveInterval::dump() const { + dbgs() << *this << '\n'; } #endif @@ -1206,8 +984,7 @@ void LiveRangeUpdater::print(raw_ostream &OS) const { OS << '\n'; } -void LiveRangeUpdater::dump() const -{ +LLVM_DUMP_METHOD void LiveRangeUpdater::dump() const { print(errs()); } @@ -1405,40 +1182,6 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveRange &LR) { return EqClass.getNumClasses(); } -template<typename LiveRangeT, typename EqClassesT> -static void DistributeRange(LiveRangeT &LR, LiveRangeT *SplitLRs[], - EqClassesT VNIClasses) { - // Move segments to new intervals. - LiveRange::iterator J = LR.begin(), E = LR.end(); - while (J != E && VNIClasses[J->valno->id] == 0) - ++J; - for (LiveRange::iterator I = J; I != E; ++I) { - if (unsigned eq = VNIClasses[I->valno->id]) { - assert((SplitLRs[eq-1]->empty() || SplitLRs[eq-1]->expiredAt(I->start)) && - "New intervals should be empty"); - SplitLRs[eq-1]->segments.push_back(*I); - } else - *J++ = *I; - } - LR.segments.erase(J, E); - - // Transfer VNInfos to their new owners and renumber them. - unsigned j = 0, e = LR.getNumValNums(); - while (j != e && VNIClasses[j] == 0) - ++j; - for (unsigned i = j; i != e; ++i) { - VNInfo *VNI = LR.getValNumInfo(i); - if (unsigned eq = VNIClasses[i]) { - VNI->id = SplitLRs[eq-1]->getNumValNums(); - SplitLRs[eq-1]->valnos.push_back(VNI); - } else { - VNI->id = j; - LR.valnos[j++] = VNI; - } - } - LR.valnos.resize(j); -} - void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[], MachineRegisterInfo &MRI) { // Rewrite instructions. @@ -1453,9 +1196,9 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[], // called, but it is not a requirement. SlotIndex Idx; if (MI->isDebugValue()) - Idx = LIS.getSlotIndexes()->getIndexBefore(MI); + Idx = LIS.getSlotIndexes()->getIndexBefore(*MI); else - Idx = LIS.getInstructionIndex(MI); + Idx = LIS.getInstructionIndex(*MI); LiveQueryResult LRQ = LI.Query(Idx); const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined(); // In the case of an <undef> use that isn't tied to any def, VNI will be @@ -1482,15 +1225,20 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[], SubRanges.resize(NumComponents-1, nullptr); for (unsigned I = 0; I < NumValNos; ++I) { const VNInfo &VNI = *SR.valnos[I]; - const VNInfo *MainRangeVNI = LI.getVNInfoAt(VNI.def); - assert(MainRangeVNI != nullptr - && "SubRange def must have corresponding main range def"); - unsigned ComponentNum = getEqClass(MainRangeVNI); - VNIMapping.push_back(ComponentNum); - if (ComponentNum > 0 && SubRanges[ComponentNum-1] == nullptr) { - SubRanges[ComponentNum-1] - = LIV[ComponentNum-1]->createSubRange(Allocator, SR.LaneMask); + unsigned ComponentNum; + if (VNI.isUnused()) { + ComponentNum = 0; + } else { + const VNInfo *MainRangeVNI = LI.getVNInfoAt(VNI.def); + assert(MainRangeVNI != nullptr + && "SubRange def must have corresponding main range def"); + ComponentNum = getEqClass(MainRangeVNI); + if (ComponentNum > 0 && SubRanges[ComponentNum-1] == nullptr) { + SubRanges[ComponentNum-1] + = LIV[ComponentNum-1]->createSubRange(Allocator, SR.LaneMask); + } } + VNIMapping.push_back(ComponentNum); } DistributeRange(SR, SubRanges.data(), VNIMapping); } diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp index a506e05..5f3281f 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -9,15 +9,13 @@ // // This file implements the LiveInterval analysis pass which is used // by the Linear Scan Register allocator. This pass linearizes the -// basic blocks of the function in DFS order and uses the -// LiveVariables pass to conservatively compute live intervals for +// basic blocks of the function in DFS order and computes live intervals for // each virtual and physical register. // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "LiveRangeCalc.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" @@ -38,7 +36,6 @@ #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cmath> -#include <limits> using namespace llvm; #define DEBUG_TYPE "regalloc" @@ -48,7 +45,6 @@ char &llvm::LiveIntervalsID = LiveIntervals::ID; INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", "Live Interval Analysis", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LiveVariables) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_END(LiveIntervals, "liveintervals", @@ -77,10 +73,6 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<AAResultsWrapperPass>(); AU.addPreserved<AAResultsWrapperPass>(); - // LiveVariables isn't really required by this analysis, it is only required - // here to make sure it is live during TwoAddressInstructionPass and - // PHIElimination. This is temporary. - AU.addRequired<LiveVariables>(); AU.addPreserved<LiveVariables>(); AU.addPreservedID(MachineLoopInfoID); AU.addRequiredTransitiveID(MachineDominatorsID); @@ -197,16 +189,9 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) { void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { assert(LRCalc && "LRCalc not initialized."); assert(LI.empty() && "Should only compute empty intervals."); - bool ShouldTrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(LI.reg); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); - LRCalc->calculate(LI, ShouldTrackSubRegLiveness); - bool SeparatedComponents = computeDeadValues(LI, nullptr); - if (SeparatedComponents) { - assert(ShouldTrackSubRegLiveness - && "Separated components should only occur for unused subreg defs"); - SmallVector<LiveInterval*, 8> SplitLIs; - splitSeparateComponents(LI, SplitLIs); - } + LRCalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg)); + computeDeadValues(LI, nullptr); } void LiveIntervals::computeVirtRegs() { @@ -236,14 +221,18 @@ void LiveIntervals::computeRegMasks() { for (const MachineOperand &MO : MI.operands()) { if (!MO.isRegMask()) continue; - RegMaskSlots.push_back(Indexes->getInstructionIndex(&MI).getRegSlot()); + RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot()); RegMaskBits.push_back(MO.getRegMask()); } } - // Some block ends, such as funclet returns, create masks. + // Some block ends, such as funclet returns, create masks. Put the mask on + // the last instruction of the block, because MBB slot index intervals are + // half-open. if (const uint32_t *Mask = MBB.getEndClobberMask(TRI)) { - RegMaskSlots.push_back(Indexes->getMBBEndIdx(&MBB)); + assert(!MBB.empty() && "empty return block?"); + RegMaskSlots.push_back( + Indexes->getInstructionIndex(MBB.back()).getRegSlot()); RegMaskBits.push_back(Mask); } @@ -439,7 +428,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, MachineInstr *UseMI = &*(I++); if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg)) continue; - SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); + SlotIndex Idx = getInstructionIndex(*UseMI).getRegSlot(); LiveQueryResult LRQ = li->Query(Idx); VNInfo *VNI = LRQ.valueIn(); if (!VNI) { @@ -485,13 +474,11 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, // Is the register live before? Otherwise we may have to add a read-undef // flag for subregister defs. - bool DeadBeforeDef = false; unsigned VReg = LI.reg; if (MRI->shouldTrackSubRegLiveness(VReg)) { if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) { MachineInstr *MI = getInstructionFromIndex(Def); MI->setRegisterDefReadUndef(VReg); - DeadBeforeDef = true; } } @@ -507,15 +494,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI, // This is a dead def. Make sure the instruction knows. MachineInstr *MI = getInstructionFromIndex(Def); assert(MI && "No instruction defining live value"); - MI->addRegisterDead(VReg, TRI); - - // If we have a dead def that is completely separate from the rest of - // the liverange then we rewrite it to use a different VReg to not violate - // the rule that the liveness of a virtual register forms a connected - // component. This should only happen if subregister liveness is tracked. - if (DeadBeforeDef) - MayHaveSplitComponents = true; - + MI->addRegisterDead(LI.reg, TRI); if (dead && MI->allDefsAreDead()) { DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI); dead->push_back(MI); @@ -547,7 +526,7 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) continue; } // We only need to visit each instruction once. - SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); + SlotIndex Idx = getInstructionIndex(*UseMI).getRegSlot(); if (Idx == LastIdx) continue; LastIdx = Idx; @@ -585,9 +564,9 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. + DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); VNI->markUnused(); SR.removeSegment(*Segment); - DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); } } @@ -837,24 +816,22 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const { return false; } -float -LiveIntervals::getSpillWeight(bool isDef, bool isUse, - const MachineBlockFrequencyInfo *MBFI, - const MachineInstr *MI) { - BlockFrequency Freq = MBFI->getBlockFreq(MI->getParent()); +float LiveIntervals::getSpillWeight(bool isDef, bool isUse, + const MachineBlockFrequencyInfo *MBFI, + const MachineInstr &MI) { + BlockFrequency Freq = MBFI->getBlockFreq(MI.getParent()); const float Scale = 1.0f / MBFI->getEntryFreq(); return (isDef + isUse) * (Freq.getFrequency() * Scale); } LiveRange::Segment -LiveIntervals::addSegmentToEndOfBlock(unsigned reg, MachineInstr* startInst) { +LiveIntervals::addSegmentToEndOfBlock(unsigned reg, MachineInstr &startInst) { LiveInterval& Interval = createEmptyInterval(reg); - VNInfo* VN = Interval.getNextValue( - SlotIndex(getInstructionIndex(startInst).getRegSlot()), - getVNInfoAllocator()); - LiveRange::Segment S( - SlotIndex(getInstructionIndex(startInst).getRegSlot()), - getMBBEndIdx(startInst->getParent()), VN); + VNInfo *VN = Interval.getNextValue( + SlotIndex(getInstructionIndex(startInst).getRegSlot()), + getVNInfoAllocator()); + LiveRange::Segment S(SlotIndex(getInstructionIndex(startInst).getRegSlot()), + getMBBEndIdx(startInst.getParent()), VN); Interval.addSegment(S); return S; @@ -962,10 +939,13 @@ public: hasRegMask = true; if (!MO.isReg()) continue; - // Aggressively clear all kill flags. - // They are reinserted by VirtRegRewriter. - if (MO.isUse()) + if (MO.isUse()) { + if (!MO.readsReg()) + continue; + // Aggressively clear all kill flags. + // They are reinserted by VirtRegRewriter. MO.setIsKill(false); + } unsigned Reg = MO.getReg(); if (!Reg) @@ -1021,172 +1001,296 @@ private: } /// Update LR to reflect an instruction has been moved downwards from OldIdx - /// to NewIdx. - /// - /// 1. Live def at OldIdx: - /// Move def to NewIdx, assert endpoint after NewIdx. - /// - /// 2. Live def at OldIdx, killed at NewIdx: - /// Change to dead def at NewIdx. - /// (Happens when bundling def+kill together). - /// - /// 3. Dead def at OldIdx: - /// Move def to NewIdx, possibly across another live value. - /// - /// 4. Def at OldIdx AND at NewIdx: - /// Remove segment [OldIdx;NewIdx) and value defined at OldIdx. - /// (Happens when bundling multiple defs together). - /// - /// 5. Value read at OldIdx, killed before NewIdx: - /// Extend kill to NewIdx. - /// + /// to NewIdx (OldIdx < NewIdx). void handleMoveDown(LiveRange &LR) { - // First look for a kill at OldIdx. - LiveRange::iterator I = LR.find(OldIdx.getBaseIndex()); LiveRange::iterator E = LR.end(); - // Is LR even live at OldIdx? - if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) + // Segment going into OldIdx. + LiveRange::iterator OldIdxIn = LR.find(OldIdx.getBaseIndex()); + + // No value live before or after OldIdx? Nothing to do. + if (OldIdxIn == E || SlotIndex::isEarlierInstr(OldIdx, OldIdxIn->start)) return; - // Handle a live-in value. - if (!SlotIndex::isSameInstr(I->start, OldIdx)) { - bool isKill = SlotIndex::isSameInstr(OldIdx, I->end); + LiveRange::iterator OldIdxOut; + // Do we have a value live-in to OldIdx? + if (SlotIndex::isEarlierInstr(OldIdxIn->start, OldIdx)) { // If the live-in value already extends to NewIdx, there is nothing to do. - if (!SlotIndex::isEarlierInstr(I->end, NewIdx)) + if (SlotIndex::isEarlierEqualInstr(NewIdx, OldIdxIn->end)) return; // Aggressively remove all kill flags from the old kill point. // Kill flags shouldn't be used while live intervals exist, they will be // reinserted by VirtRegRewriter. - if (MachineInstr *KillMI = LIS.getInstructionFromIndex(I->end)) - for (MIBundleOperands MO(KillMI); MO.isValid(); ++MO) + if (MachineInstr *KillMI = LIS.getInstructionFromIndex(OldIdxIn->end)) + for (MIBundleOperands MO(*KillMI); MO.isValid(); ++MO) if (MO->isReg() && MO->isUse()) MO->setIsKill(false); - // Adjust I->end to reach NewIdx. This may temporarily make LR invalid by - // overlapping ranges. Case 5 above. - I->end = NewIdx.getRegSlot(I->end.isEarlyClobber()); - // If this was a kill, there may also be a def. Otherwise we're done. + + // Is there a def before NewIdx which is not OldIdx? + LiveRange::iterator Next = std::next(OldIdxIn); + if (Next != E && !SlotIndex::isSameInstr(OldIdx, Next->start) && + SlotIndex::isEarlierInstr(Next->start, NewIdx)) { + // If we are here then OldIdx was just a use but not a def. We only have + // to ensure liveness extends to NewIdx. + LiveRange::iterator NewIdxIn = + LR.advanceTo(Next, NewIdx.getBaseIndex()); + // Extend the segment before NewIdx if necessary. + if (NewIdxIn == E || + !SlotIndex::isEarlierInstr(NewIdxIn->start, NewIdx)) { + LiveRange::iterator Prev = std::prev(NewIdxIn); + Prev->end = NewIdx.getRegSlot(); + } + return; + } + + // Adjust OldIdxIn->end to reach NewIdx. This may temporarily make LR + // invalid by overlapping ranges. + bool isKill = SlotIndex::isSameInstr(OldIdx, OldIdxIn->end); + OldIdxIn->end = NewIdx.getRegSlot(OldIdxIn->end.isEarlyClobber()); + // If this was not a kill, then there was no def and we're done. if (!isKill) return; - ++I; + + // Did we have a Def at OldIdx? + OldIdxOut = Next; + if (OldIdxOut == E || !SlotIndex::isSameInstr(OldIdx, OldIdxOut->start)) + return; + } else { + OldIdxOut = OldIdxIn; } - // Check for a def at OldIdx. - if (I == E || !SlotIndex::isSameInstr(OldIdx, I->start)) - return; - // We have a def at OldIdx. - VNInfo *DefVNI = I->valno; - assert(DefVNI->def == I->start && "Inconsistent def"); - DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber()); - // If the defined value extends beyond NewIdx, just move the def down. - // This is case 1 above. - if (SlotIndex::isEarlierInstr(NewIdx, I->end)) { - I->start = DefVNI->def; + // If we are here then there is a Definition at OldIdx. OldIdxOut points + // to the segment starting there. + assert(OldIdxOut != E && SlotIndex::isSameInstr(OldIdx, OldIdxOut->start) && + "No def?"); + VNInfo *OldIdxVNI = OldIdxOut->valno; + assert(OldIdxVNI->def == OldIdxOut->start && "Inconsistent def"); + + // If the defined value extends beyond NewIdx, just move the beginning + // of the segment to NewIdx. + SlotIndex NewIdxDef = NewIdx.getRegSlot(OldIdxOut->start.isEarlyClobber()); + if (SlotIndex::isEarlierInstr(NewIdxDef, OldIdxOut->end)) { + OldIdxVNI->def = NewIdxDef; + OldIdxOut->start = OldIdxVNI->def; return; } - // The remaining possibilities are now: - // 2. Live def at OldIdx, killed at NewIdx: isSameInstr(I->end, NewIdx). - // 3. Dead def at OldIdx: I->end = OldIdx.getDeadSlot(). - // In either case, it is possible that there is an existing def at NewIdx. - assert((I->end == OldIdx.getDeadSlot() || - SlotIndex::isSameInstr(I->end, NewIdx)) && - "Cannot move def below kill"); - LiveRange::iterator NewI = LR.advanceTo(I, NewIdx.getRegSlot()); - if (NewI != E && SlotIndex::isSameInstr(NewI->start, NewIdx)) { - // There is an existing def at NewIdx, case 4 above. The def at OldIdx is - // coalesced into that value. - assert(NewI->valno != DefVNI && "Multiple defs of value?"); - LR.removeValNo(DefVNI); + + // If we are here then we have a Definition at OldIdx which ends before + // NewIdx. + + // Is there an existing Def at NewIdx? + LiveRange::iterator AfterNewIdx + = LR.advanceTo(OldIdxOut, NewIdx.getRegSlot()); + bool OldIdxDefIsDead = OldIdxOut->end.isDead(); + if (!OldIdxDefIsDead && + SlotIndex::isEarlierInstr(OldIdxOut->end, NewIdxDef)) { + // OldIdx is not a dead def, and NewIdxDef is inside a new interval. + VNInfo *DefVNI; + if (OldIdxOut != LR.begin() && + !SlotIndex::isEarlierInstr(std::prev(OldIdxOut)->end, + OldIdxOut->start)) { + // There is no gap between OldIdxOut and its predecessor anymore, + // merge them. + LiveRange::iterator IPrev = std::prev(OldIdxOut); + DefVNI = OldIdxVNI; + IPrev->end = OldIdxOut->end; + } else { + // The value is live in to OldIdx + LiveRange::iterator INext = std::next(OldIdxOut); + assert(INext != E && "Must have following segment"); + // We merge OldIdxOut and its successor. As we're dealing with subreg + // reordering, there is always a successor to OldIdxOut in the same BB + // We don't need INext->valno anymore and will reuse for the new segment + // we create later. + DefVNI = OldIdxVNI; + INext->start = OldIdxOut->end; + INext->valno->def = INext->start; + } + // If NewIdx is behind the last segment, extend that and append a new one. + if (AfterNewIdx == E) { + // OldIdxOut is undef at this point, Slide (OldIdxOut;AfterNewIdx] up + // one position. + // |- ?/OldIdxOut -| |- X0 -| ... |- Xn -| end + // => |- X0/OldIdxOut -| ... |- Xn -| |- undef/NewS -| end + std::copy(std::next(OldIdxOut), E, OldIdxOut); + // The last segment is undefined now, reuse it for a dead def. + LiveRange::iterator NewSegment = std::prev(E); + *NewSegment = LiveRange::Segment(NewIdxDef, NewIdxDef.getDeadSlot(), + DefVNI); + DefVNI->def = NewIdxDef; + + LiveRange::iterator Prev = std::prev(NewSegment); + Prev->end = NewIdxDef; + } else { + // OldIdxOut is undef at this point, Slide (OldIdxOut;AfterNewIdx] up + // one position. + // |- ?/OldIdxOut -| |- X0 -| ... |- Xn/AfterNewIdx -| |- Next -| + // => |- X0/OldIdxOut -| ... |- Xn -| |- Xn/AfterNewIdx -| |- Next -| + std::copy(std::next(OldIdxOut), std::next(AfterNewIdx), OldIdxOut); + LiveRange::iterator Prev = std::prev(AfterNewIdx); + // We have two cases: + if (SlotIndex::isEarlierInstr(Prev->start, NewIdxDef)) { + // Case 1: NewIdx is inside a liverange. Split this liverange at + // NewIdxDef into the segment "Prev" followed by "NewSegment". + LiveRange::iterator NewSegment = AfterNewIdx; + *NewSegment = LiveRange::Segment(NewIdxDef, Prev->end, Prev->valno); + Prev->valno->def = NewIdxDef; + + *Prev = LiveRange::Segment(Prev->start, NewIdxDef, DefVNI); + DefVNI->def = Prev->start; + } else { + // Case 2: NewIdx is in a lifetime hole. Keep AfterNewIdx as is and + // turn Prev into a segment from NewIdx to AfterNewIdx->start. + *Prev = LiveRange::Segment(NewIdxDef, AfterNewIdx->start, DefVNI); + DefVNI->def = NewIdxDef; + assert(DefVNI != AfterNewIdx->valno); + } + } return; } - // There was no existing def at NewIdx. Turn *I into a dead def at NewIdx. - // If the def at OldIdx was dead, we allow it to be moved across other LR - // values. The new range should be placed immediately before NewI, move any - // intermediate ranges up. - assert(NewI != I && "Inconsistent iterators"); - std::copy(std::next(I), NewI, I); - *std::prev(NewI) - = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); + + if (AfterNewIdx != E && + SlotIndex::isSameInstr(AfterNewIdx->start, NewIdxDef)) { + // There is an existing def at NewIdx. The def at OldIdx is coalesced into + // that value. + assert(AfterNewIdx->valno != OldIdxVNI && "Multiple defs of value?"); + LR.removeValNo(OldIdxVNI); + } else { + // There was no existing def at NewIdx. We need to create a dead def + // at NewIdx. Shift segments over the old OldIdxOut segment, this frees + // a new segment at the place where we want to construct the dead def. + // |- OldIdxOut -| |- X0 -| ... |- Xn -| |- AfterNewIdx -| + // => |- X0/OldIdxOut -| ... |- Xn -| |- undef/NewS. -| |- AfterNewIdx -| + assert(AfterNewIdx != OldIdxOut && "Inconsistent iterators"); + std::copy(std::next(OldIdxOut), AfterNewIdx, OldIdxOut); + // We can reuse OldIdxVNI now. + LiveRange::iterator NewSegment = std::prev(AfterNewIdx); + VNInfo *NewSegmentVNI = OldIdxVNI; + NewSegmentVNI->def = NewIdxDef; + *NewSegment = LiveRange::Segment(NewIdxDef, NewIdxDef.getDeadSlot(), + NewSegmentVNI); + } } /// Update LR to reflect an instruction has been moved upwards from OldIdx - /// to NewIdx. - /// - /// 1. Live def at OldIdx: - /// Hoist def to NewIdx. - /// - /// 2. Dead def at OldIdx: - /// Hoist def+end to NewIdx, possibly move across other values. - /// - /// 3. Dead def at OldIdx AND existing def at NewIdx: - /// Remove value defined at OldIdx, coalescing it with existing value. - /// - /// 4. Live def at OldIdx AND existing def at NewIdx: - /// Remove value defined at NewIdx, hoist OldIdx def to NewIdx. - /// (Happens when bundling multiple defs together). - /// - /// 5. Value killed at OldIdx: - /// Hoist kill to NewIdx, then scan for last kill between NewIdx and - /// OldIdx. - /// + /// to NewIdx (NewIdx < OldIdx). void handleMoveUp(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) { - // First look for a kill at OldIdx. - LiveRange::iterator I = LR.find(OldIdx.getBaseIndex()); LiveRange::iterator E = LR.end(); - // Is LR even live at OldIdx? - if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) + // Segment going into OldIdx. + LiveRange::iterator OldIdxIn = LR.find(OldIdx.getBaseIndex()); + + // No value live before or after OldIdx? Nothing to do. + if (OldIdxIn == E || SlotIndex::isEarlierInstr(OldIdx, OldIdxIn->start)) return; - // Handle a live-in value. - if (!SlotIndex::isSameInstr(I->start, OldIdx)) { - // If the live-in value isn't killed here, there is nothing to do. - if (!SlotIndex::isSameInstr(OldIdx, I->end)) - return; - // Adjust I->end to end at NewIdx. If we are hoisting a kill above - // another use, we need to search for that use. Case 5 above. - I->end = NewIdx.getRegSlot(I->end.isEarlyClobber()); - ++I; - // If OldIdx also defines a value, there couldn't have been another use. - if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) { - // No def, search for the new kill. - // This can never be an early clobber kill since there is no def. - std::prev(I)->end = findLastUseBefore(Reg, LaneMask).getRegSlot(); + LiveRange::iterator OldIdxOut; + // Do we have a value live-in to OldIdx? + if (SlotIndex::isEarlierInstr(OldIdxIn->start, OldIdx)) { + // If the live-in value isn't killed here, then we have no Def at + // OldIdx, moreover the value must be live at NewIdx so there is nothing + // to do. + bool isKill = SlotIndex::isSameInstr(OldIdx, OldIdxIn->end); + if (!isKill) return; - } - } - // Now deal with the def at OldIdx. - assert(I != E && SlotIndex::isSameInstr(I->start, OldIdx) && "No def?"); - VNInfo *DefVNI = I->valno; - assert(DefVNI->def == I->start && "Inconsistent def"); - DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber()); - - // Check for an existing def at NewIdx. - LiveRange::iterator NewI = LR.find(NewIdx.getRegSlot()); - if (SlotIndex::isSameInstr(NewI->start, NewIdx)) { - assert(NewI->valno != DefVNI && "Same value defined more than once?"); - // There is an existing def at NewIdx. - if (I->end.isDead()) { - // Case 3: Remove the dead def at OldIdx. - LR.removeValNo(DefVNI); + // At this point we have to move OldIdxIn->end back to the nearest + // previous use or (dead-)def but no further than NewIdx. + SlotIndex DefBeforeOldIdx + = std::max(OldIdxIn->start.getDeadSlot(), + NewIdx.getRegSlot(OldIdxIn->end.isEarlyClobber())); + OldIdxIn->end = findLastUseBefore(DefBeforeOldIdx, Reg, LaneMask); + + // Did we have a Def at OldIdx? If not we are done now. + OldIdxOut = std::next(OldIdxIn); + if (OldIdxOut == E || !SlotIndex::isSameInstr(OldIdx, OldIdxOut->start)) return; - } - // Case 4: Replace def at NewIdx with live def at OldIdx. - I->start = DefVNI->def; - LR.removeValNo(NewI->valno); - return; + } else { + OldIdxOut = OldIdxIn; + OldIdxIn = OldIdxOut != LR.begin() ? std::prev(OldIdxOut) : E; } - // There is no existing def at NewIdx. Hoist DefVNI. - if (!I->end.isDead()) { - // Leave the end point of a live def. - I->start = DefVNI->def; - return; + // If we are here then there is a Definition at OldIdx. OldIdxOut points + // to the segment starting there. + assert(OldIdxOut != E && SlotIndex::isSameInstr(OldIdx, OldIdxOut->start) && + "No def?"); + VNInfo *OldIdxVNI = OldIdxOut->valno; + assert(OldIdxVNI->def == OldIdxOut->start && "Inconsistent def"); + bool OldIdxDefIsDead = OldIdxOut->end.isDead(); + + // Is there an existing def at NewIdx? + SlotIndex NewIdxDef = NewIdx.getRegSlot(OldIdxOut->start.isEarlyClobber()); + LiveRange::iterator NewIdxOut = LR.find(NewIdx.getRegSlot()); + if (SlotIndex::isSameInstr(NewIdxOut->start, NewIdx)) { + assert(NewIdxOut->valno != OldIdxVNI && + "Same value defined more than once?"); + // If OldIdx was a dead def remove it. + if (!OldIdxDefIsDead) { + // Remove segment starting at NewIdx and move begin of OldIdxOut to + // NewIdx so it can take its place. + OldIdxVNI->def = NewIdxDef; + OldIdxOut->start = NewIdxDef; + LR.removeValNo(NewIdxOut->valno); + } else { + // Simply remove the dead def at OldIdx. + LR.removeValNo(OldIdxVNI); + } + } else { + // Previously nothing was live after NewIdx, so all we have to do now is + // move the begin of OldIdxOut to NewIdx. + if (!OldIdxDefIsDead) { + // Do we have any intermediate Defs between OldIdx and NewIdx? + if (OldIdxIn != E && + SlotIndex::isEarlierInstr(NewIdxDef, OldIdxIn->start)) { + // OldIdx is not a dead def and NewIdx is before predecessor start. + LiveRange::iterator NewIdxIn = NewIdxOut; + assert(NewIdxIn == LR.find(NewIdx.getBaseIndex())); + const SlotIndex SplitPos = NewIdxDef; + + // Merge the OldIdxIn and OldIdxOut segments into OldIdxOut. + *OldIdxOut = LiveRange::Segment(OldIdxIn->start, OldIdxOut->end, + OldIdxIn->valno); + // OldIdxIn and OldIdxVNI are now undef and can be overridden. + // We Slide [NewIdxIn, OldIdxIn) down one position. + // |- X0/NewIdxIn -| ... |- Xn-1 -||- Xn/OldIdxIn -||- OldIdxOut -| + // => |- undef/NexIdxIn -| |- X0 -| ... |- Xn-1 -| |- Xn/OldIdxOut -| + std::copy_backward(NewIdxIn, OldIdxIn, OldIdxOut); + // NewIdxIn is now considered undef so we can reuse it for the moved + // value. + LiveRange::iterator NewSegment = NewIdxIn; + LiveRange::iterator Next = std::next(NewSegment); + if (SlotIndex::isEarlierInstr(Next->start, NewIdx)) { + // There is no gap between NewSegment and its predecessor. + *NewSegment = LiveRange::Segment(Next->start, SplitPos, + Next->valno); + *Next = LiveRange::Segment(SplitPos, Next->end, OldIdxVNI); + Next->valno->def = SplitPos; + } else { + // There is a gap between NewSegment and its predecessor + // Value becomes live in. + *NewSegment = LiveRange::Segment(SplitPos, Next->start, OldIdxVNI); + NewSegment->valno->def = SplitPos; + } + } else { + // Leave the end point of a live def. + OldIdxOut->start = NewIdxDef; + OldIdxVNI->def = NewIdxDef; + if (OldIdxIn != E && SlotIndex::isEarlierInstr(NewIdx, OldIdxIn->end)) + OldIdxIn->end = NewIdx.getRegSlot(); + } + } else { + // OldIdxVNI is a dead def. It may have been moved across other values + // in LR, so move OldIdxOut up to NewIdxOut. Slide [NewIdxOut;OldIdxOut) + // down one position. + // |- X0/NewIdxOut -| ... |- Xn-1 -| |- Xn/OldIdxOut -| |- next - | + // => |- undef/NewIdxOut -| |- X0 -| ... |- Xn-1 -| |- next -| + std::copy_backward(NewIdxOut, OldIdxOut, std::next(OldIdxOut)); + // OldIdxVNI can be reused now to build a new dead def segment. + LiveRange::iterator NewSegment = NewIdxOut; + VNInfo *NewSegmentVNI = OldIdxVNI; + *NewSegment = LiveRange::Segment(NewIdxDef, NewIdxDef.getDeadSlot(), + NewSegmentVNI); + NewSegmentVNI->def = NewIdxDef; + } } - - // DefVNI is a dead def. It may have been moved across other values in LR, - // so move I up to NewI. Slide [NewI;I) down one position. - std::copy_backward(NewI, I, std::next(I)); - *NewI = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); } void updateRegMaskSlots() { @@ -1205,29 +1309,31 @@ private: } // Return the last use of reg between NewIdx and OldIdx. - SlotIndex findLastUseBefore(unsigned Reg, LaneBitmask LaneMask) { - + SlotIndex findLastUseBefore(SlotIndex Before, unsigned Reg, + LaneBitmask LaneMask) { if (TargetRegisterInfo::isVirtualRegister(Reg)) { - SlotIndex LastUse = NewIdx; + SlotIndex LastUse = Before; for (MachineOperand &MO : MRI.use_nodbg_operands(Reg)) { + if (MO.isUndef()) + continue; unsigned SubReg = MO.getSubReg(); if (SubReg != 0 && LaneMask != 0 && (TRI.getSubRegIndexLaneMask(SubReg) & LaneMask) == 0) continue; - const MachineInstr *MI = MO.getParent(); + const MachineInstr &MI = *MO.getParent(); SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI); if (InstSlot > LastUse && InstSlot < OldIdx) - LastUse = InstSlot; + LastUse = InstSlot.getRegSlot(); } return LastUse; } // This is a regunit interval, so scanning the use list could be very // expensive. Scan upwards from OldIdx instead. - assert(NewIdx < OldIdx && "Expected upwards move"); + assert(Before < OldIdx && "Expected upwards move"); SlotIndexes *Indexes = LIS.getSlotIndexes(); - MachineBasicBlock *MBB = Indexes->getMBBFromIndex(NewIdx); + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(Before); // OldIdx may not correspond to an instruction any longer, so set MII to // point to the next instruction after OldIdx, or MBB->end(). @@ -1241,44 +1347,44 @@ private: while (MII != Begin) { if ((--MII)->isDebugValue()) continue; - SlotIndex Idx = Indexes->getInstructionIndex(MII); + SlotIndex Idx = Indexes->getInstructionIndex(*MII); - // Stop searching when NewIdx is reached. - if (!SlotIndex::isEarlierInstr(NewIdx, Idx)) - return NewIdx; + // Stop searching when Before is reached. + if (!SlotIndex::isEarlierInstr(Before, Idx)) + return Before; // Check if MII uses Reg. - for (MIBundleOperands MO(MII); MO.isValid(); ++MO) - if (MO->isReg() && + for (MIBundleOperands MO(*MII); MO.isValid(); ++MO) + if (MO->isReg() && !MO->isUndef() && TargetRegisterInfo::isPhysicalRegister(MO->getReg()) && TRI.hasRegUnit(MO->getReg(), Reg)) - return Idx; + return Idx.getRegSlot(); } - // Didn't reach NewIdx. It must be the first instruction in the block. - return NewIdx; + // Didn't reach Before. It must be the first instruction in the block. + return Before; } }; -void LiveIntervals::handleMove(MachineInstr* MI, bool UpdateFlags) { - assert(!MI->isBundled() && "Can't handle bundled instructions yet."); +void LiveIntervals::handleMove(MachineInstr &MI, bool UpdateFlags) { + assert(!MI.isBundled() && "Can't handle bundled instructions yet."); SlotIndex OldIndex = Indexes->getInstructionIndex(MI); Indexes->removeMachineInstrFromMaps(MI); SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI); - assert(getMBBStartIdx(MI->getParent()) <= OldIndex && - OldIndex < getMBBEndIdx(MI->getParent()) && + assert(getMBBStartIdx(MI.getParent()) <= OldIndex && + OldIndex < getMBBEndIdx(MI.getParent()) && "Cannot handle moves across basic block boundaries."); HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags); - HME.updateAllRanges(MI); + HME.updateAllRanges(&MI); } -void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, - MachineInstr* BundleStart, +void LiveIntervals::handleMoveIntoBundle(MachineInstr &MI, + MachineInstr &BundleStart, bool UpdateFlags) { SlotIndex OldIndex = Indexes->getInstructionIndex(MI); SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart); HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags); - HME.updateAllRanges(MI); + HME.updateAllRanges(&MI); } void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin, @@ -1295,8 +1401,8 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin, for (MachineBasicBlock::iterator I = End; I != Begin;) { --I; - MachineInstr *MI = I; - if (MI->isDebugValue()) + MachineInstr &MI = *I; + if (MI.isDebugValue()) continue; SlotIndex instrIdx = getInstructionIndex(MI); @@ -1305,8 +1411,9 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin, // FIXME: This doesn't currently handle early-clobber or multiple removed // defs inside of the region to repair. - for (MachineInstr::mop_iterator OI = MI->operands_begin(), - OE = MI->operands_end(); OI != OE; ++OI) { + for (MachineInstr::mop_iterator OI = MI.operands_begin(), + OE = MI.operands_end(); + OI != OE; ++OI) { const MachineOperand &MO = *OI; if (!MO.isReg() || MO.getReg() != Reg) continue; @@ -1376,26 +1483,27 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, ArrayRef<unsigned> OrigRegs) { // Find anchor points, which are at the beginning/end of blocks or at // instructions that already have indexes. - while (Begin != MBB->begin() && !Indexes->hasIndex(Begin)) + while (Begin != MBB->begin() && !Indexes->hasIndex(*Begin)) --Begin; - while (End != MBB->end() && !Indexes->hasIndex(End)) + while (End != MBB->end() && !Indexes->hasIndex(*End)) ++End; SlotIndex endIdx; if (End == MBB->end()) endIdx = getMBBEndIdx(MBB).getPrevSlot(); else - endIdx = getInstructionIndex(End); + endIdx = getInstructionIndex(*End); Indexes->repairIndexesInRange(MBB, Begin, End); for (MachineBasicBlock::iterator I = End; I != Begin;) { --I; - MachineInstr *MI = I; - if (MI->isDebugValue()) + MachineInstr &MI = *I; + if (MI.isDebugValue()) continue; - for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); MOI != MOE; ++MOI) { + for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(), + MOE = MI.operands_end(); + MOI != MOE; ++MOI) { if (MOI->isReg() && TargetRegisterInfo::isVirtualRegister(MOI->getReg()) && !hasInterval(MOI->getReg())) { @@ -1459,3 +1567,9 @@ void LiveIntervals::splitSeparateComponents(LiveInterval &LI, } ConEQ.Distribute(LI, SplitLIs.data(), *MRI); } + +void LiveIntervals::constructMainRangeFromSubranges(LiveInterval &LI) { + assert(LRCalc && "LRCalc not initialized."); + LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); + LRCalc->constructMainRangeFromSubranges(LI); +} diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp index efbbcbe..4e2528f 100644 --- a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp +++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -43,7 +44,7 @@ void LivePhysRegs::removeRegsInMask(const MachineOperand &MO, /// Remove Defs, add uses. This is the recommended way of calculating liveness. void LivePhysRegs::stepBackward(const MachineInstr &MI) { // Remove defined registers and regmask kills from the set. - for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (O->isReg()) { if (!O->isDef()) continue; @@ -56,8 +57,8 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) { } // Add uses to the set. - for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { - if (!O->isReg() || !O->readsReg() || O->isUndef()) + for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { + if (!O->isReg() || !O->readsReg()) continue; unsigned Reg = O->getReg(); if (Reg == 0) @@ -73,7 +74,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) { void LivePhysRegs::stepForward(const MachineInstr &MI, SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers) { // Remove killed registers from the set. - for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (O->isReg()) { unsigned Reg = O->getReg(); if (Reg == 0) @@ -120,12 +121,25 @@ void LivePhysRegs::print(raw_ostream &OS) const { } /// Dumps the currently live registers to the debug output. -void LivePhysRegs::dump() const { +LLVM_DUMP_METHOD void LivePhysRegs::dump() const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) dbgs() << " " << *this; #endif } +bool LivePhysRegs::available(const MachineRegisterInfo &MRI, + unsigned Reg) const { + if (LiveRegs.count(Reg)) + return false; + if (MRI.isReserved(Reg)) + return false; + for (MCRegAliasIterator R(Reg, TRI, false); R.isValid(); ++R) { + if (LiveRegs.count(*R)) + return false; + } + return true; +} + /// Add live-in registers of basic block \p MBB to \p LiveRegs. static void addLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB) { for (const auto &LI : MBB.liveins()) @@ -135,40 +149,41 @@ static void addLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB) { /// Add pristine registers to the given \p LiveRegs. This function removes /// actually saved callee save registers when \p InPrologueEpilogue is false. static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF, + const MachineFrameInfo &MFI, const TargetRegisterInfo &TRI) { - const MachineFrameInfo &MFI = *MF.getFrameInfo(); - if (!MFI.isCalleeSavedInfoValid()) - return; - for (const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR) LiveRegs.addReg(*CSR); for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) LiveRegs.removeReg(Info.getReg()); } -void LivePhysRegs::addLiveOuts(const MachineBasicBlock *MBB, - bool AddPristinesAndCSRs) { - if (AddPristinesAndCSRs) { - const MachineFunction &MF = *MBB->getParent(); - addPristines(*this, MF, *TRI); - if (!MBB->isReturnBlock()) { +void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) { + // To get the live-outs we simply merge the live-ins of all successors. + for (const MachineBasicBlock *Succ : MBB.successors()) + ::addLiveIns(*this, *Succ); +} + +void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) { + const MachineFunction &MF = *MBB.getParent(); + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + if (MFI.isCalleeSavedInfoValid()) { + if (MBB.isReturnBlock()) { // The return block has no successors whose live-ins we could merge // below. So instead we add the callee saved registers manually. for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) addReg(*I); + } else { + addPristines(*this, MF, MFI, *TRI); } } - // To get the live-outs we simply merge the live-ins of all successors. - for (const MachineBasicBlock *Succ : MBB->successors()) - ::addLiveIns(*this, *Succ); + addLiveOutsNoPristines(MBB); } -void LivePhysRegs::addLiveIns(const MachineBasicBlock *MBB, - bool AddPristines) { - if (AddPristines) { - const MachineFunction &MF = *MBB->getParent(); - addPristines(*this, MF, *TRI); - } - ::addLiveIns(*this, *MBB); +void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) { + const MachineFunction &MF = *MBB.getParent(); + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + if (MFI.isCalleeSavedInfoValid()) + addPristines(*this, MF, MFI, *TRI); + ::addLiveIns(*this, MBB); } diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp index c408615..db91ca1 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -42,12 +42,12 @@ void LiveRangeCalc::reset(const MachineFunction *mf, static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc, LiveRange &LR, const MachineOperand &MO) { - const MachineInstr *MI = MO.getParent(); - SlotIndex DefIdx = - Indexes.getInstructionIndex(MI).getRegSlot(MO.isEarlyClobber()); + const MachineInstr &MI = *MO.getParent(); + SlotIndex DefIdx = + Indexes.getInstructionIndex(MI).getRegSlot(MO.isEarlyClobber()); - // Create the def in LR. This may find an existing def. - LR.createDeadDef(DefIdx, Alloc); + // Create the def in LR. This may find an existing def. + LR.createDeadDef(DefIdx, Alloc); } void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) { @@ -120,13 +120,29 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) { extendToUses(S, Reg, S.LaneMask); } LI.clear(); - LI.constructMainRangeFromSubranges(*Indexes, *Alloc); + constructMainRangeFromSubranges(LI); } else { resetLiveOutMap(); extendToUses(LI, Reg, ~0u); } } +void LiveRangeCalc::constructMainRangeFromSubranges(LiveInterval &LI) { + // First create dead defs at all defs found in subranges. + LiveRange &MainRange = LI; + assert(MainRange.segments.empty() && MainRange.valnos.empty() && + "Expect empty main liverange"); + + for (const LiveInterval::SubRange &SR : LI.subranges()) { + for (const VNInfo *VNI : SR.valnos) { + if (!VNI->isUnused() && !VNI->isPHIDef()) + MainRange.createDeadDef(VNI->def, *Alloc); + } + } + + resetLiveOutMap(); + extendToUses(MainRange, LI.reg); +} void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) { assert(MRI && Indexes && "call reset() first"); @@ -184,7 +200,7 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, // had an early-clobber flag. isEarlyClobber = MI->getOperand(DefIdx).isEarlyClobber(); } - UseIdx = Indexes->getInstructionIndex(MI).getRegSlot(isEarlyClobber); + UseIdx = Indexes->getInstructionIndex(*MI).getRegSlot(isEarlyClobber); } // MI is reading Reg. We may have visited MI before if it happens to be diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h index ff38c68..9de48b7 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h @@ -189,6 +189,11 @@ public: /// enabled. void calculate(LiveInterval &LI, bool TrackSubRegs); + /// For live interval \p LI with correct SubRanges construct matching + /// information for the main live range. Expects the main live range to not + /// have any segments or value numbers. + void constructMainRangeFromSubranges(LiveInterval &LI); + //===--------------------------------------------------------------------===// // Low-level interface. //===--------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp index 5ce364a..b35c0ad 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -53,7 +53,7 @@ bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, AliasAnalysis *aa) { assert(DefMI && "Missing instruction"); ScannedRemattable = true; - if (!TII.isTriviallyReMaterializable(DefMI, aa)) + if (!TII.isTriviallyReMaterializable(*DefMI, aa)) return false; Remattable.insert(VNI); return true; @@ -63,10 +63,13 @@ void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) { for (VNInfo *VNI : getParent().valnos) { if (VNI->isUnused()) continue; - MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def); + unsigned Original = VRM->getOriginal(getReg()); + LiveInterval &OrigLI = LIS.getInterval(Original); + VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def); + MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def); if (!DefMI) continue; - checkRematerializable(VNI, DefMI, aa); + checkRematerializable(OrigVNI, DefMI, aa); } ScannedRemattable = true; } @@ -113,27 +116,21 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, return true; } -bool LiveRangeEdit::canRematerializeAt(Remat &RM, - SlotIndex UseIdx, - bool cheapAsAMove) { +bool LiveRangeEdit::canRematerializeAt(Remat &RM, VNInfo *OrigVNI, + SlotIndex UseIdx, bool cheapAsAMove) { assert(ScannedRemattable && "Call anyRematerializable first"); // Use scanRemattable info. - if (!Remattable.count(RM.ParentVNI)) + if (!Remattable.count(OrigVNI)) return false; // No defining instruction provided. SlotIndex DefIdx; - if (RM.OrigMI) - DefIdx = LIS.getInstructionIndex(RM.OrigMI); - else { - DefIdx = RM.ParentVNI->def; - RM.OrigMI = LIS.getInstructionFromIndex(DefIdx); - assert(RM.OrigMI && "No defining instruction for remattable value"); - } + assert(RM.OrigMI && "No defining instruction for remattable value"); + DefIdx = LIS.getInstructionIndex(*RM.OrigMI); // If only cheap remats were requested, bail out early. - if (cheapAsAMove && !TII.isAsCheapAsAMove(RM.OrigMI)) + if (cheapAsAMove && !TII.isAsCheapAsAMove(*RM.OrigMI)) return false; // Verify that all used registers are available with the same values. @@ -150,10 +147,13 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB, const TargetRegisterInfo &tri, bool Late) { assert(RM.OrigMI && "Invalid remat"); - TII.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri); + TII.reMaterialize(MBB, MI, DestReg, 0, *RM.OrigMI, tri); + // DestReg of the cloned instruction cannot be Dead. Set isDead of DestReg + // to false anyway in case the isDead flag of RM.OrigMI's dest register + // is true. + (*--MI).getOperand(0).setIsDead(false); Rematted.insert(RM.ParentVNI); - return LIS.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late) - .getRegSlot(); + return LIS.getSlotIndexes()->insertMachineInstrInMaps(*MI, Late).getRegSlot(); } void LiveRangeEdit::eraseVirtReg(unsigned Reg) { @@ -188,9 +188,8 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, // Since we're moving the DefMI load, make sure we're not extending any live // ranges. - if (!allUsesAvailableAt(DefMI, - LIS.getInstructionIndex(DefMI), - LIS.getInstructionIndex(UseMI))) + if (!allUsesAvailableAt(DefMI, LIS.getInstructionIndex(*DefMI), + LIS.getInstructionIndex(*UseMI))) return false; // We also need to make sure it is safe to move the load. @@ -206,11 +205,11 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second) return false; - MachineInstr *FoldMI = TII.foldMemoryOperand(UseMI, Ops, DefMI); + MachineInstr *FoldMI = TII.foldMemoryOperand(*UseMI, Ops, *DefMI, &LIS); if (!FoldMI) return false; DEBUG(dbgs() << " folded: " << *FoldMI); - LIS.ReplaceMachineInstrInMaps(UseMI, FoldMI); + LIS.ReplaceMachineInstrInMaps(*UseMI, *FoldMI); UseMI->eraseFromParent(); DefMI->addRegisterDead(LI->reg, nullptr); Dead.push_back(DefMI); @@ -220,7 +219,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, bool LiveRangeEdit::useIsKill(const LiveInterval &LI, const MachineOperand &MO) const { - const MachineInstr *MI = MO.getParent(); + const MachineInstr &MI = *MO.getParent(); SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); if (LI.Query(Idx).isKill()) return true; @@ -235,9 +234,10 @@ bool LiveRangeEdit::useIsKill(const LiveInterval &LI, } /// Find all live intervals that need to shrink, then remove the instruction. -void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { +void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, + AliasAnalysis *AA) { assert(MI->allDefsAreDead() && "Def isn't really dead"); - SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); + SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot(); // Never delete a bundled instruction. if (MI->isBundled()) { @@ -261,6 +261,20 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { // Collect virtual registers to be erased after MI is gone. SmallVector<unsigned, 8> RegsToErase; bool ReadsPhysRegs = false; + bool isOrigDef = false; + unsigned Dest; + if (VRM && MI->getOperand(0).isReg()) { + Dest = MI->getOperand(0).getReg(); + unsigned Original = VRM->getOriginal(Dest); + LiveInterval &OrigLI = LIS.getInterval(Original); + VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx); + // The original live-range may have been shrunk to + // an empty live-range. It happens when it is dead, but + // we still keep it around to be able to rematerialize + // other values that depend on it. + if (OrigVNI) + isOrigDef = SlotIndex::isSameInstr(OrigVNI->def, Idx); + } // Check for live intervals that may shrink for (MachineInstr::mop_iterator MOI = MI->operands_begin(), @@ -314,11 +328,27 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { } DEBUG(dbgs() << "Converted physregs to:\t" << *MI); } else { - if (TheDelegate) - TheDelegate->LRE_WillEraseInstruction(MI); - LIS.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - ++NumDCEDeleted; + // If the dest of MI is an original reg and MI is reMaterializable, + // don't delete the inst. Replace the dest with a new reg, and keep + // the inst for remat of other siblings. The inst is saved in + // LiveRangeEdit::DeadRemats and will be deleted after all the + // allocations of the func are done. + if (isOrigDef && DeadRemats && TII.isTriviallyReMaterializable(*MI, AA)) { + LiveInterval &NewLI = createEmptyIntervalFrom(Dest); + VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); + NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI)); + pop_back(); + markDeadRemat(MI); + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + MI->substituteRegister(Dest, NewLI.reg, 0, TRI); + MI->getOperand(0).setIsDead(true); + } else { + if (TheDelegate) + TheDelegate->LRE_WillEraseInstruction(MI); + LIS.RemoveMachineInstrFromMaps(*MI); + MI->eraseFromParent(); + ++NumDCEDeleted; + } } // Erase any virtregs that are now empty and unused. There may be <undef> @@ -332,14 +362,15 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { } } -void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, - ArrayRef<unsigned> RegsBeingSpilled) { +void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead, + ArrayRef<unsigned> RegsBeingSpilled, + AliasAnalysis *AA) { ToShrinkSet ToShrink; for (;;) { // Erase all dead defs. while (!Dead.empty()) - eliminateDeadDef(Dead.pop_back_val(), ToShrink); + eliminateDeadDef(Dead.pop_back_val(), ToShrink, AA); if (ToShrink.empty()) break; diff --git a/contrib/llvm/lib/CodeGen/LiveRangeUtils.h b/contrib/llvm/lib/CodeGen/LiveRangeUtils.h new file mode 100644 index 0000000..bd57609 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveRangeUtils.h @@ -0,0 +1,62 @@ +//===-- LiveRangeUtils.h - Live Range modification utilities ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// This file contains helper functions to modify live ranges. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_LIVERANGEUTILS_H +#define LLVM_LIB_CODEGEN_LIVERANGEUTILS_H + +#include "llvm/CodeGen/LiveInterval.h" + +namespace llvm { + +/// Helper function that distributes live range value numbers and the +/// corresponding segments of a master live range \p LR to a list of newly +/// created live ranges \p SplitLRs. \p VNIClasses maps each value number in \p +/// LR to 0 meaning it should stay or to 1..N meaning it should go to a specific +/// live range in the \p SplitLRs array. +template<typename LiveRangeT, typename EqClassesT> +static void DistributeRange(LiveRangeT &LR, LiveRangeT *SplitLRs[], + EqClassesT VNIClasses) { + // Move segments to new intervals. + typename LiveRangeT::iterator J = LR.begin(), E = LR.end(); + while (J != E && VNIClasses[J->valno->id] == 0) + ++J; + for (typename LiveRangeT::iterator I = J; I != E; ++I) { + if (unsigned eq = VNIClasses[I->valno->id]) { + assert((SplitLRs[eq-1]->empty() || SplitLRs[eq-1]->expiredAt(I->start)) && + "New intervals should be empty"); + SplitLRs[eq-1]->segments.push_back(*I); + } else + *J++ = *I; + } + LR.segments.erase(J, E); + + // Transfer VNInfos to their new owners and renumber them. + unsigned j = 0, e = LR.getNumValNums(); + while (j != e && VNIClasses[j] == 0) + ++j; + for (unsigned i = j; i != e; ++i) { + VNInfo *VNI = LR.getValNumInfo(i); + if (unsigned eq = VNIClasses[i]) { + VNI->id = SplitLRs[eq-1]->getNumValNums(); + SplitLRs[eq-1]->valnos.push_back(VNI); + } else { + VNI->id = j; + LR.valnos[j++] = VNI; + } + } + LR.valnos.resize(j); +} + +} // End llvm namespace + +#endif diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp index 5c9c679..dbf1f96 100644 --- a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp @@ -14,14 +14,12 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveStackAnalysis.h" -#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include <limits> using namespace llvm; #define DEBUG_TYPE "livestacks" diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp index 06b86d8..dd87216 100644 --- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp @@ -64,7 +64,7 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const { return nullptr; } -void LiveVariables::VarInfo::dump() const { +LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) dbgs() << " Alive in blocks: "; for (SparseBitVector<>::iterator I = AliveBlocks.begin(), @@ -129,7 +129,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo, } void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB, - MachineInstr *MI) { + MachineInstr &MI) { assert(MRI->getVRegDef(reg) && "Register use before def!"); unsigned BBNum = MBB->getNumber(); @@ -140,7 +140,7 @@ void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB, if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) { // Yes, this register is killed in this basic block already. Increase the // live range by updating the kill instruction. - VRInfo.Kills.back() = MI; + VRInfo.Kills.back() = &MI; return; } @@ -171,7 +171,7 @@ void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB, // already marked as alive in this basic block, that means it is alive in at // least one of the successor blocks, it's not a kill. if (!VRInfo.AliveBlocks.test(BBNum)) - VRInfo.Kills.push_back(MI); + VRInfo.Kills.push_back(&MI); // Update all dominating blocks to mark them as "known live". for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), @@ -179,12 +179,12 @@ void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB, MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(reg)->getParent(), *PI); } -void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr *MI) { +void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr &MI) { VarInfo &VRInfo = getVarInfo(Reg); if (VRInfo.AliveBlocks.empty()) // If vr is not alive in any block, then defaults to dead. - VRInfo.Kills.push_back(MI); + VRInfo.Kills.push_back(&MI); } /// FindLastPartialDef - Return the last partial def of the specified register. @@ -228,7 +228,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, /// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add /// implicit defs to a machine instruction if there was an earlier def of its /// super-register. -void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { +void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr &MI) { MachineInstr *LastDef = PhysRegDef[Reg]; // If there was a previous use or a "full" def all is well. if (!LastDef && !PhysRegUse[Reg]) { @@ -273,7 +273,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { // Remember this use. for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) - PhysRegUse[*SubRegs] = MI; + PhysRegUse[*SubRegs] = &MI; } /// FindLastRefOrPartRef - Return the last reference or partial reference of @@ -483,7 +483,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, Defs.push_back(Reg); // Remember this def. } -void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI, +void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI, SmallVectorImpl<unsigned> &Defs) { while (!Defs.empty()) { unsigned Reg = Defs.back(); @@ -491,21 +491,21 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI, for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) { unsigned SubReg = *SubRegs; - PhysRegDef[SubReg] = MI; + PhysRegDef[SubReg] = &MI; PhysRegUse[SubReg] = nullptr; } } } -void LiveVariables::runOnInstr(MachineInstr *MI, +void LiveVariables::runOnInstr(MachineInstr &MI, SmallVectorImpl<unsigned> &Defs) { - assert(!MI->isDebugValue()); + assert(!MI.isDebugValue()); // Process all of the operands of the instruction... - unsigned NumOperandsToProcess = MI->getNumOperands(); + unsigned NumOperandsToProcess = MI.getNumOperands(); // Unless it is a PHI node. In this case, ONLY process the DEF, not any // of the uses. They will be handled in other basic blocks. - if (MI->isPHI()) + if (MI.isPHI()) NumOperandsToProcess = 1; // Clear kill and dead markers. LV will recompute them. @@ -513,7 +513,7 @@ void LiveVariables::runOnInstr(MachineInstr *MI, SmallVector<unsigned, 4> DefRegs; SmallVector<unsigned, 1> RegMasks; for (unsigned i = 0; i != NumOperandsToProcess; ++i) { - MachineOperand &MO = MI->getOperand(i); + MachineOperand &MO = MI.getOperand(i); if (MO.isRegMask()) { RegMasks.push_back(i); continue; @@ -527,15 +527,18 @@ void LiveVariables::runOnInstr(MachineInstr *MI, MO.setIsKill(false); if (MO.readsReg()) UseRegs.push_back(MOReg); - } else /*MO.isDef()*/ { - if (!(TargetRegisterInfo::isPhysicalRegister(MOReg) && - MRI->isReserved(MOReg))) + } else { + assert(MO.isDef()); + // FIXME: We should not remove any dead flags. However the MIPS RDDSP + // instruction needs it at the moment: http://llvm.org/PR27116. + if (TargetRegisterInfo::isPhysicalRegister(MOReg) && + !MRI->isReserved(MOReg)) MO.setIsDead(false); DefRegs.push_back(MOReg); } } - MachineBasicBlock *MBB = MI->getParent(); + MachineBasicBlock *MBB = MI.getParent(); // Process all uses. for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) { unsigned MOReg = UseRegs[i]; @@ -547,7 +550,7 @@ void LiveVariables::runOnInstr(MachineInstr *MI, // Process all masked registers. (Call clobbers). for (unsigned i = 0, e = RegMasks.size(); i != e; ++i) - HandleRegMask(MI->getOperand(RegMasks[i])); + HandleRegMask(MI.getOperand(RegMasks[i])); // Process all defs. for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) { @@ -555,7 +558,7 @@ void LiveVariables::runOnInstr(MachineInstr *MI, if (TargetRegisterInfo::isVirtualRegister(MOReg)) HandleVirtRegDef(MOReg, MI); else if (!MRI->isReserved(MOReg)) - HandlePhysRegDef(MOReg, MI, Defs); + HandlePhysRegDef(MOReg, &MI, Defs); } UpdatePhysRegDefs(MI, Defs); } @@ -572,12 +575,10 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) { // Loop over all of the instructions, processing them. DistanceMap.clear(); unsigned Dist = 0; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - MachineInstr *MI = I; - if (MI->isDebugValue()) + for (MachineInstr &MI : *MBB) { + if (MI.isDebugValue()) continue; - DistanceMap.insert(std::make_pair(MI, Dist++)); + DistanceMap.insert(std::make_pair(&MI, Dist++)); runOnInstr(MI, Defs); } @@ -679,17 +680,17 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { /// replaceKillInstruction - Update register kill info by replacing a kill /// instruction with a new one. -void LiveVariables::replaceKillInstruction(unsigned Reg, MachineInstr *OldMI, - MachineInstr *NewMI) { +void LiveVariables::replaceKillInstruction(unsigned Reg, MachineInstr &OldMI, + MachineInstr &NewMI) { VarInfo &VI = getVarInfo(Reg); - std::replace(VI.Kills.begin(), VI.Kills.end(), OldMI, NewMI); + std::replace(VI.Kills.begin(), VI.Kills.end(), &OldMI, &NewMI); } /// removeVirtualRegistersKilled - Remove all killed info for the specified /// instruction. -void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); +void LiveVariables::removeVirtualRegistersKilled(MachineInstr &MI) { + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && MO.isKill()) { MO.setIsKill(false); unsigned Reg = MO.getReg(); diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index eb60005..af7392f 100644 --- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -256,12 +256,12 @@ lookupCandidateBaseReg(unsigned BaseReg, int64_t BaseOffset, int64_t FrameSizeAdjust, int64_t LocalFrameOffset, - const MachineInstr *MI, + const MachineInstr &MI, const TargetRegisterInfo *TRI) { // Check if the relative offset from the where the base register references // to the target address is in range for the instruction. int64_t Offset = FrameSizeAdjust + LocalFrameOffset - BaseOffset; - return TRI->isFrameOffsetLegal(MI, BaseReg, Offset); + return TRI->isFrameOffsetLegal(&MI, BaseReg, Offset); } bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { @@ -285,16 +285,13 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // choose the first one). SmallVector<FrameRef, 64> FrameReferenceInsns; - for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { - for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { - MachineInstr *MI = I; - + for (MachineBasicBlock &BB : Fn) { + for (MachineInstr &MI : BB) { // Debug value, stackmap and patchpoint instructions can't be out of // range, so they don't need any updates. - if (MI->isDebugValue() || - MI->getOpcode() == TargetOpcode::STATEPOINT || - MI->getOpcode() == TargetOpcode::STACKMAP || - MI->getOpcode() == TargetOpcode::PATCHPOINT) + if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STATEPOINT || + MI.getOpcode() == TargetOpcode::STACKMAP || + MI.getOpcode() == TargetOpcode::PATCHPOINT) continue; // For now, allocate the base register(s) within the basic block @@ -303,19 +300,18 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // than that, but the increased register pressure makes that a // tricky thing to balance. Investigate if re-materializing these // becomes an issue. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { // Consider replacing all frame index operands that reference // an object allocated in the local block. - if (MI->getOperand(i).isFI()) { + if (MI.getOperand(i).isFI()) { // Don't try this with values not in the local block. - if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex())) + if (!MFI->isObjectPreAllocated(MI.getOperand(i).getIndex())) break; - int Idx = MI->getOperand(i).getIndex(); + int Idx = MI.getOperand(i).getIndex(); int64_t LocalOffset = LocalOffsets[Idx]; - if (!TRI->needsFrameBaseReg(MI, LocalOffset)) + if (!TRI->needsFrameBaseReg(&MI, LocalOffset)) break; - FrameReferenceInsns. - push_back(FrameRef(MI, LocalOffset, Idx)); + FrameReferenceInsns.push_back(FrameRef(&MI, LocalOffset, Idx)); break; } } @@ -333,46 +329,44 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // Loop through the frame references and allocate for them as necessary. for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) { FrameRef &FR = FrameReferenceInsns[ref]; - MachineBasicBlock::iterator I = FR.getMachineInstr(); - MachineInstr *MI = I; + MachineInstr &MI = *FR.getMachineInstr(); int64_t LocalOffset = FR.getLocalOffset(); int FrameIdx = FR.getFrameIndex(); assert(MFI->isObjectPreAllocated(FrameIdx) && "Only pre-allocated locals expected!"); - DEBUG(dbgs() << "Considering: " << *MI); + DEBUG(dbgs() << "Considering: " << MI); unsigned idx = 0; - for (unsigned f = MI->getNumOperands(); idx != f; ++idx) { - if (!MI->getOperand(idx).isFI()) + for (unsigned f = MI.getNumOperands(); idx != f; ++idx) { + if (!MI.getOperand(idx).isFI()) continue; - if (FrameIdx == I->getOperand(idx).getIndex()) + if (FrameIdx == MI.getOperand(idx).getIndex()) break; } - assert(idx < MI->getNumOperands() && "Cannot find FI operand"); + assert(idx < MI.getNumOperands() && "Cannot find FI operand"); int64_t Offset = 0; int64_t FrameSizeAdjust = StackGrowsDown ? MFI->getLocalFrameSize() : 0; - DEBUG(dbgs() << " Replacing FI in: " << *MI); + DEBUG(dbgs() << " Replacing FI in: " << MI); // If we have a suitable base register available, use it; otherwise // create a new one. Note that any offset encoded in the // instruction itself will be taken into account by the target, // so we don't have to adjust for it here when reusing a base // register. - if (UsedBaseReg && lookupCandidateBaseReg(BaseReg, BaseOffset, - FrameSizeAdjust, LocalOffset, MI, - TRI)) { + if (UsedBaseReg && + lookupCandidateBaseReg(BaseReg, BaseOffset, FrameSizeAdjust, + LocalOffset, MI, TRI)) { DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n"); // We found a register to reuse. Offset = FrameSizeAdjust + LocalOffset - BaseOffset; } else { - // No previously defined register was in range, so create a // new one. - - int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx); + // No previously defined register was in range, so create a new one. + int64_t InstrOffset = TRI->getFrameIndexInstrOffset(&MI, idx); int64_t PrevBaseOffset = BaseOffset; BaseOffset = FrameSizeAdjust + LocalOffset + InstrOffset; @@ -386,12 +380,12 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { !lookupCandidateBaseReg( BaseReg, BaseOffset, FrameSizeAdjust, FrameReferenceInsns[ref + 1].getLocalOffset(), - FrameReferenceInsns[ref + 1].getMachineInstr(), TRI)) { + *FrameReferenceInsns[ref + 1].getMachineInstr(), TRI)) { BaseOffset = PrevBaseOffset; continue; } - const MachineFunction *MF = MI->getParent()->getParent(); + const MachineFunction *MF = MI.getParent()->getParent(); const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF); BaseReg = Fn.getRegInfo().createVirtualRegister(RC); @@ -416,8 +410,8 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { // Modify the instruction to use the new base register rather // than the frame index operand. - TRI->resolveFrameIndex(*I, BaseReg, Offset); - DEBUG(dbgs() << "Resolved: " << *MI); + TRI->resolveFrameIndex(MI, BaseReg, Offset); + DEBUG(dbgs() << "Resolved: " << MI); ++NumReplacements; } diff --git a/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp new file mode 100644 index 0000000..6966c8c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp @@ -0,0 +1,162 @@ +//===- LowerEmuTLS.cpp - Add __emutls_[vt].* variables --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transformation is required for targets depending on libgcc style +// emulated thread local storage variables. For every defined TLS variable xyz, +// an __emutls_v.xyz is generated. If there is non-zero initialized value +// an __emutls_t.xyz is also generated. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetLowering.h" + +using namespace llvm; + +#define DEBUG_TYPE "loweremutls" + +namespace { + +class LowerEmuTLS : public ModulePass { + const TargetMachine *TM; +public: + static char ID; // Pass identification, replacement for typeid + explicit LowerEmuTLS() : ModulePass(ID), TM(nullptr) { } + explicit LowerEmuTLS(const TargetMachine *TM) + : ModulePass(ID), TM(TM) { + initializeLowerEmuTLSPass(*PassRegistry::getPassRegistry()); + } + bool runOnModule(Module &M) override; +private: + bool addEmuTlsVar(Module &M, const GlobalVariable *GV); + static void copyLinkageVisibility(Module &M, + const GlobalVariable *from, + GlobalVariable *to) { + to->setLinkage(from->getLinkage()); + to->setVisibility(from->getVisibility()); + if (from->hasComdat()) { + to->setComdat(M.getOrInsertComdat(to->getName())); + to->getComdat()->setSelectionKind(from->getComdat()->getSelectionKind()); + } + } +}; +} + +char LowerEmuTLS::ID = 0; + +INITIALIZE_PASS(LowerEmuTLS, "loweremutls", + "Add __emutls_[vt]. variables for emultated TLS model", + false, false) + +ModulePass *llvm::createLowerEmuTLSPass(const TargetMachine *TM) { + return new LowerEmuTLS(TM); +} + +bool LowerEmuTLS::runOnModule(Module &M) { + if (skipModule(M)) + return false; + + if (!TM || !TM->Options.EmulatedTLS) + return false; + + bool Changed = false; + SmallVector<const GlobalVariable*, 8> TlsVars; + for (const auto &G : M.globals()) { + if (G.isThreadLocal()) + TlsVars.append({&G}); + } + for (const auto G : TlsVars) + Changed |= addEmuTlsVar(M, G); + return Changed; +} + +bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) { + LLVMContext &C = M.getContext(); + PointerType *VoidPtrType = Type::getInt8PtrTy(C); + + std::string EmuTlsVarName = ("__emutls_v." + GV->getName()).str(); + GlobalVariable *EmuTlsVar = M.getNamedGlobal(EmuTlsVarName); + if (EmuTlsVar) + return false; // It has been added before. + + const DataLayout &DL = M.getDataLayout(); + Constant *NullPtr = ConstantPointerNull::get(VoidPtrType); + + // Get non-zero initializer from GV's initializer. + const Constant *InitValue = nullptr; + if (GV->hasInitializer()) { + InitValue = GV->getInitializer(); + const ConstantInt *InitIntValue = dyn_cast<ConstantInt>(InitValue); + // When GV's init value is all 0, omit the EmuTlsTmplVar and let + // the emutls library function to reset newly allocated TLS variables. + if (isa<ConstantAggregateZero>(InitValue) || + (InitIntValue && InitIntValue->isZero())) + InitValue = nullptr; + } + + // Create the __emutls_v. symbol, whose type has 4 fields: + // word size; // size of GV in bytes + // word align; // alignment of GV + // void *ptr; // initialized to 0; set at run time per thread. + // void *templ; // 0 or point to __emutls_t.* + // sizeof(word) should be the same as sizeof(void*) on target. + IntegerType *WordType = DL.getIntPtrType(C); + PointerType *InitPtrType = InitValue ? + PointerType::getUnqual(InitValue->getType()) : VoidPtrType; + Type *ElementTypes[4] = {WordType, WordType, VoidPtrType, InitPtrType}; + ArrayRef<Type*> ElementTypeArray(ElementTypes, 4); + StructType *EmuTlsVarType = StructType::create(ElementTypeArray); + EmuTlsVar = cast<GlobalVariable>( + M.getOrInsertGlobal(EmuTlsVarName, EmuTlsVarType)); + copyLinkageVisibility(M, GV, EmuTlsVar); + + // Define "__emutls_t.*" and "__emutls_v.*" only if GV is defined. + if (!GV->hasInitializer()) + return true; + + Type *GVType = GV->getValueType(); + unsigned GVAlignment = GV->getAlignment(); + if (!GVAlignment) { + // When LLVM IL declares a variable without alignment, use + // the ABI default alignment for the type. + GVAlignment = DL.getABITypeAlignment(GVType); + } + + // Define "__emutls_t.*" if there is InitValue + GlobalVariable *EmuTlsTmplVar = nullptr; + if (InitValue) { + std::string EmuTlsTmplName = ("__emutls_t." + GV->getName()).str(); + EmuTlsTmplVar = dyn_cast_or_null<GlobalVariable>( + M.getOrInsertGlobal(EmuTlsTmplName, GVType)); + assert(EmuTlsTmplVar && "Failed to create emualted TLS initializer"); + EmuTlsTmplVar->setConstant(true); + EmuTlsTmplVar->setInitializer(const_cast<Constant*>(InitValue)); + EmuTlsTmplVar->setAlignment(GVAlignment); + copyLinkageVisibility(M, GV, EmuTlsTmplVar); + } + + // Define "__emutls_v.*" with initializer and alignment. + Constant *ElementValues[4] = { + ConstantInt::get(WordType, DL.getTypeStoreSize(GVType)), + ConstantInt::get(WordType, GVAlignment), + NullPtr, EmuTlsTmplVar ? EmuTlsTmplVar : NullPtr + }; + ArrayRef<Constant*> ElementValueArray(ElementValues, 4); + EmuTlsVar->setInitializer( + ConstantStruct::get(EmuTlsVarType, ElementValueArray)); + unsigned MaxAlignment = std::max( + DL.getABITypeAlignment(WordType), + DL.getABITypeAlignment(VoidPtrType)); + EmuTlsVar->setAlignment(MaxAlignment); + return true; +} diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 28f9d4e..6e3de52 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "MILexer.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" @@ -21,6 +22,9 @@ using namespace llvm; namespace { +typedef function_ref<void(StringRef::iterator Loc, const Twine &)> + ErrorCallbackType; + /// This class provides a way to iterate and get characters from the source /// string. class Cursor { @@ -133,9 +137,7 @@ static std::string unescapeQuotedString(StringRef Value) { } /// Lex a string constant using the following regular expression: \"[^\"]*\" -static Cursor lexStringConstant( - Cursor C, - function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { +static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) { assert(C.peek() == '"'); for (C.advance(); C.peek() != '"'; C.advance()) { if (C.isEOF() || isNewlineChar(C.peek())) { @@ -149,9 +151,8 @@ static Cursor lexStringConstant( return C; } -static Cursor lexName( - Cursor C, MIToken &Token, MIToken::TokenKind Type, unsigned PrefixLength, - function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { +static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type, + unsigned PrefixLength, ErrorCallbackType ErrorCallback) { auto Range = C; C.advance(PrefixLength); if (C.peek() == '"') { @@ -241,9 +242,8 @@ static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { return C; } -static Cursor maybeLexMachineBasicBlock( - Cursor C, MIToken &Token, - function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { +static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { bool IsReference = C.remaining().startswith("%bb."); if (!IsReference && !C.remaining().startswith("bb.")) return None; @@ -326,9 +326,17 @@ static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) { return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem); } -static Cursor maybeLexIRBlock( - Cursor C, MIToken &Token, - function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { +static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { + const StringRef Rule = "%subreg."; + if (!C.remaining().startswith(Rule)) + return None; + return lexName(C, Token, MIToken::SubRegisterIndex, Rule.size(), + ErrorCallback); +} + +static Cursor maybeLexIRBlock(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { const StringRef Rule = "%ir-block."; if (!C.remaining().startswith(Rule)) return None; @@ -337,9 +345,8 @@ static Cursor maybeLexIRBlock( return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback); } -static Cursor maybeLexIRValue( - Cursor C, MIToken &Token, - function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { +static Cursor maybeLexIRValue(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { const StringRef Rule = "%ir."; if (!C.remaining().startswith(Rule)) return None; @@ -373,9 +380,8 @@ static Cursor maybeLexRegister(Cursor C, MIToken &Token) { return C; } -static Cursor maybeLexGlobalValue( - Cursor C, MIToken &Token, - function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { +static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { if (C.peek() != '@') return None; if (!isdigit(C.peek(1))) @@ -391,9 +397,8 @@ static Cursor maybeLexGlobalValue( return C; } -static Cursor maybeLexExternalSymbol( - Cursor C, MIToken &Token, - function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { +static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { if (C.peek() != '$') return None; return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1, @@ -456,9 +461,8 @@ static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { .Default(MIToken::Error); } -static Cursor maybeLexExlaim( - Cursor C, MIToken &Token, - function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { +static Cursor maybeLexExlaim(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { if (C.peek() != '!') return None; auto Range = C; @@ -497,6 +501,10 @@ static MIToken::TokenKind symbolToken(char C) { return MIToken::plus; case '-': return MIToken::minus; + case '<': + return MIToken::less; + case '>': + return MIToken::greater; default: return MIToken::Error; } @@ -527,9 +535,8 @@ static Cursor maybeLexNewline(Cursor C, MIToken &Token) { return C; } -static Cursor maybeLexEscapedIRValue( - Cursor C, MIToken &Token, - function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { +static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { if (C.peek() != '`') return None; auto Range = C; @@ -551,9 +558,8 @@ static Cursor maybeLexEscapedIRValue( return C; } -StringRef llvm::lexMIToken( - StringRef Source, MIToken &Token, - function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { +StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, + ErrorCallbackType ErrorCallback) { auto C = skipComment(skipWhitespace(Cursor(Source))); if (C.isEOF()) { Token.reset(MIToken::Eof, C.remaining()); @@ -574,6 +580,8 @@ StringRef llvm::lexMIToken( return R.remaining(); if (Cursor R = maybeLexConstantPoolItem(C, Token)) return R.remaining(); + if (Cursor R = maybeLexSubRegisterIndex(C, Token, ErrorCallback)) + return R.remaining(); if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback)) return R.remaining(); if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback)) diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h index ff54aa3..32fc8ab 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -45,6 +45,8 @@ struct MIToken { rbrace, plus, minus, + less, + greater, // Keywords kw_implicit, @@ -116,7 +118,8 @@ struct MIToken { IRBlock, NamedIRValue, IRValue, - QuotedIRValue // `<constant value>` + QuotedIRValue, // `<constant value>` + SubRegisterIndex }; private: diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp index f2f6584..b3fd16f 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -17,24 +17,30 @@ #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/IR/Instructions.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/IR/ValueSymbolTable.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SourceMgr.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; +PerFunctionMIParsingState::PerFunctionMIParsingState(MachineFunction &MF, + SourceMgr &SM, const SlotMapping &IRSlots) + : MF(MF), SM(&SM), IRSlots(IRSlots) { +} + namespace { /// A wrapper struct around the 'MachineOperand' struct that includes a source @@ -55,14 +61,11 @@ struct ParsedMachineOperand { }; class MIParser { - SourceMgr &SM; MachineFunction &MF; SMDiagnostic &Error; StringRef Source, CurrentSource; MIToken Token; const PerFunctionMIParsingState &PFS; - /// Maps from indices to unnamed global values and metadata nodes. - const SlotMapping &IRSlots; /// Maps from instruction names to op codes. StringMap<unsigned> Names2InstrOpCodes; /// Maps from register names to registers. @@ -83,11 +86,12 @@ class MIParser { StringMap<unsigned> Names2BitmaskTargetFlags; public: - MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, - StringRef Source, const PerFunctionMIParsingState &PFS, - const SlotMapping &IRSlots); + MIParser(const PerFunctionMIParsingState &PFS, SMDiagnostic &Error, + StringRef Source); - void lex(); + /// \p SkipChar gives the number of characters to skip before looking + /// for the next token. + void lex(unsigned SkipChar = 0); /// Report an error at the current location with the given message. /// @@ -119,12 +123,17 @@ public: bool parseRegisterFlag(unsigned &Flags); bool parseSubRegisterIndex(unsigned &SubReg); bool parseRegisterTiedDefIndex(unsigned &TiedDefIdx); + bool parseSize(unsigned &Size); bool parseRegisterOperand(MachineOperand &Dest, Optional<unsigned> &TiedDefIdx, bool IsDef = false); bool parseImmediateOperand(MachineOperand &Dest); bool parseIRConstant(StringRef::iterator Loc, StringRef Source, const Constant *&C); bool parseIRConstant(StringRef::iterator Loc, const Constant *&C); + bool parseIRType(StringRef::iterator Loc, StringRef Source, unsigned &Read, + Type *&Ty); + // \p MustBeSized defines whether or not \p Ty must be sized. + bool parseIRType(StringRef::iterator Loc, Type *&Ty, bool MustBeSized = true); bool parseTypedImmediateOperand(MachineOperand &Dest); bool parseFPImmediateOperand(MachineOperand &Dest); bool parseMBBReference(MachineBasicBlock *&MBB); @@ -136,6 +145,7 @@ public: bool parseGlobalValue(GlobalValue *&GV); bool parseGlobalAddressOperand(MachineOperand &Dest); bool parseConstantPoolIndexOperand(MachineOperand &Dest); + bool parseSubRegisterIndexOperand(MachineOperand &Dest); bool parseJumpTableIndexOperand(MachineOperand &Dest); bool parseExternalSymbolOperand(MachineOperand &Dest); bool parseMDNode(MDNode *&Node); @@ -155,7 +165,7 @@ public: bool parseAlignment(unsigned &Alignment); bool parseOperandsOffset(MachineOperand &Op); bool parseIRValue(const Value *&V); - bool parseMemoryOperandFlag(unsigned &Flags); + bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags); bool parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV); bool parseMachinePointerInfo(MachinePointerInfo &Dest); bool parseMachineMemoryOperand(MachineMemOperand *&Dest); @@ -244,21 +254,21 @@ private: } // end anonymous namespace -MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, - StringRef Source, const PerFunctionMIParsingState &PFS, - const SlotMapping &IRSlots) - : SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source), - PFS(PFS), IRSlots(IRSlots) {} +MIParser::MIParser(const PerFunctionMIParsingState &PFS, SMDiagnostic &Error, + StringRef Source) + : MF(PFS.MF), Error(Error), Source(Source), CurrentSource(Source), PFS(PFS) +{} -void MIParser::lex() { +void MIParser::lex(unsigned SkipChar) { CurrentSource = lexMIToken( - CurrentSource, Token, + CurrentSource.data() + SkipChar, Token, [this](StringRef::iterator Loc, const Twine &Msg) { error(Loc, Msg); }); } bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); } bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) { + const SourceMgr &SM = *PFS.SM; assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size())); const MemoryBuffer &Buffer = *SM.getMemoryBuffer(SM.getMainFileID()); if (Loc >= Buffer.getBufferStart() && Loc <= Buffer.getBufferEnd()) { @@ -587,6 +597,14 @@ bool MIParser::parse(MachineInstr *&MI) { if (Token.isError() || parseInstruction(OpCode, Flags)) return true; + Type *Ty = nullptr; + if (isPreISelGenericOpcode(OpCode)) { + // For generic opcode, a type is mandatory. + auto Loc = Token.location(); + if (parseIRType(Loc, Ty)) + return true; + } + // Parse the remaining machine operands. while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_debug_location) && Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) { @@ -642,6 +660,8 @@ bool MIParser::parse(MachineInstr *&MI) { // TODO: Check for extraneous machine operands. MI = MF.CreateMachineInstr(MCID, DebugLocation, /*NoImplicit=*/true); MI->setFlags(Flags); + if (Ty) + MI->setType(Ty); for (const auto &Operand : Operands) MI->addOperand(MF, Operand.Operand); if (assignRegisterTies(*MI, Operands)) @@ -876,6 +896,17 @@ bool MIParser::parseRegisterTiedDefIndex(unsigned &TiedDefIdx) { return false; } +bool MIParser::parseSize(unsigned &Size) { + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected an integer literal for the size"); + if (getUnsigned(Size)) + return true; + lex(); + if (expectAndConsume(MIToken::rparen)) + return true; + return false; +} + bool MIParser::assignRegisterTies(MachineInstr &MI, ArrayRef<ParsedMachineOperand> Operands) { SmallVector<std::pair<unsigned, unsigned>, 4> TiedRegisterPairs; @@ -931,12 +962,31 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, if (Token.is(MIToken::colon)) { if (parseSubRegisterIndex(SubReg)) return true; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return error("subregister index expects a virtual register"); } - if ((Flags & RegState::Define) == 0 && consumeIfPresent(MIToken::lparen)) { - unsigned Idx; - if (parseRegisterTiedDefIndex(Idx)) + if ((Flags & RegState::Define) == 0) { + if (consumeIfPresent(MIToken::lparen)) { + unsigned Idx; + if (parseRegisterTiedDefIndex(Idx)) + return true; + TiedDefIdx = Idx; + } + } else if (consumeIfPresent(MIToken::lparen)) { + // Virtual registers may have a size with GlobalISel. + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return error("unexpected size on physical register"); + unsigned Size; + if (parseSize(Size)) return true; - TiedDefIdx = Idx; + + MachineRegisterInfo &MRI = MF.getRegInfo(); + MRI.setSize(Reg, Size); + } else if (PFS.GenericVRegs.count(Reg)) { + // Generic virtual registers must have a size. + // If we end up here this means the size hasn't been specified and + // this is bad! + return error("generic virtual registers must have a size"); } Dest = MachineOperand::CreateReg( Reg, Flags & RegState::Define, Flags & RegState::Implicit, @@ -961,7 +1011,7 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue, auto Source = StringValue.str(); // The source has to be null terminated. SMDiagnostic Err; C = parseConstantValue(Source.c_str(), Err, *MF.getFunction()->getParent(), - &IRSlots); + &PFS.IRSlots); if (!C) return error(Loc + Err.getColumnNo(), Err.getMessage()); return false; @@ -974,6 +1024,38 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) { return false; } +bool MIParser::parseIRType(StringRef::iterator Loc, StringRef StringValue, + unsigned &Read, Type *&Ty) { + auto Source = StringValue.str(); // The source has to be null terminated. + SMDiagnostic Err; + Ty = parseTypeAtBeginning(Source.c_str(), Read, Err, + *MF.getFunction()->getParent(), &PFS.IRSlots); + if (!Ty) + return error(Loc + Err.getColumnNo(), Err.getMessage()); + return false; +} + +bool MIParser::parseIRType(StringRef::iterator Loc, Type *&Ty, + bool MustBeSized) { + // At this point we enter in the IR world, i.e., to get the correct type, + // we need to hand off the whole string, not just the current token. + // E.g., <4 x i64> would give '<' as a token and there is not much + // the IR parser can do with that. + unsigned Read = 0; + if (parseIRType(Loc, StringRef(Loc), Read, Ty)) + return true; + // The type must be sized, otherwise there is not much the backend + // can do with it. + if (MustBeSized && !Ty->isSized()) + return error("expected a sized type"); + // The next token is Read characters from the Loc. + // However, the current location is not Loc, but Loc + the length of Token. + // Therefore, subtract the length of Token (range().end() - Loc) to the + // number of characters to skip before the next token. + lex(Read - (Token.range().end() - Loc)); + return false; +} + bool MIParser::parseTypedImmediateOperand(MachineOperand &Dest) { assert(Token.is(MIToken::IntegerType)); auto Loc = Token.location(); @@ -1100,10 +1182,10 @@ bool MIParser::parseGlobalValue(GlobalValue *&GV) { unsigned GVIdx; if (getUnsigned(GVIdx)) return true; - if (GVIdx >= IRSlots.GlobalValues.size()) + if (GVIdx >= PFS.IRSlots.GlobalValues.size()) return error(Twine("use of undefined global value '@") + Twine(GVIdx) + "'"); - GV = IRSlots.GlobalValues[GVIdx]; + GV = PFS.IRSlots.GlobalValues[GVIdx]; break; } default: @@ -1161,6 +1243,17 @@ bool MIParser::parseExternalSymbolOperand(MachineOperand &Dest) { return false; } +bool MIParser::parseSubRegisterIndexOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::SubRegisterIndex)); + StringRef Name = Token.stringValue(); + unsigned SubRegIndex = getSubRegIndex(Token.stringValue()); + if (SubRegIndex == 0) + return error(Twine("unknown subregister index '") + Name + "'"); + lex(); + Dest = MachineOperand::CreateImm(SubRegIndex); + return false; +} + bool MIParser::parseMDNode(MDNode *&Node) { assert(Token.is(MIToken::exclaim)); auto Loc = Token.location(); @@ -1170,8 +1263,8 @@ bool MIParser::parseMDNode(MDNode *&Node) { unsigned ID; if (getUnsigned(ID)) return true; - auto NodeInfo = IRSlots.MetadataNodes.find(ID); - if (NodeInfo == IRSlots.MetadataNodes.end()) + auto NodeInfo = PFS.IRSlots.MetadataNodes.find(ID); + if (NodeInfo == PFS.IRSlots.MetadataNodes.end()) return error(Loc, "use of undefined metadata '!" + Twine(ID) + "'"); lex(); Node = NodeInfo->second.get(); @@ -1406,6 +1499,8 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest, return parseJumpTableIndexOperand(Dest); case MIToken::ExternalSymbol: return parseExternalSymbolOperand(Dest); + case MIToken::SubRegisterIndex: + return parseSubRegisterIndexOperand(Dest); case MIToken::exclaim: return parseMetadataOperand(Dest); case MIToken::kw_cfi_same_value: @@ -1559,8 +1654,8 @@ bool MIParser::getUint64(uint64_t &Result) { return false; } -bool MIParser::parseMemoryOperandFlag(unsigned &Flags) { - const unsigned OldFlags = Flags; +bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) { + const auto OldFlags = Flags; switch (Token.kind()) { case MIToken::kw_volatile: Flags |= MachineMemOperand::MOVolatile; @@ -1605,6 +1700,14 @@ bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) { // The token was already consumed, so use return here instead of break. return false; } + case MIToken::StackObject: { + int FI; + if (parseStackFrameIndex(FI)) + return true; + PSV = MF.getPSVManager().getFixedStack(FI); + // The token was already consumed, so use return here instead of break. + return false; + } case MIToken::kw_call_entry: { lex(); switch (Token.kind()) { @@ -1636,7 +1739,8 @@ bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) { bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) { if (Token.is(MIToken::kw_constant_pool) || Token.is(MIToken::kw_stack) || Token.is(MIToken::kw_got) || Token.is(MIToken::kw_jump_table) || - Token.is(MIToken::FixedStackObject) || Token.is(MIToken::kw_call_entry)) { + Token.is(MIToken::FixedStackObject) || Token.is(MIToken::StackObject) || + Token.is(MIToken::kw_call_entry)) { const PseudoSourceValue *PSV = nullptr; if (parseMemoryPseudoSourceValue(PSV)) return true; @@ -1667,7 +1771,7 @@ bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) { bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { if (expectAndConsume(MIToken::lparen)) return true; - unsigned Flags = 0; + MachineMemOperand::Flags Flags = MachineMemOperand::MONone; while (Token.isMemoryOperandFlag()) { if (parseMemoryOperandFlag(Flags)) return true; @@ -1688,14 +1792,16 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { return true; lex(); - const char *Word = Flags & MachineMemOperand::MOLoad ? "from" : "into"; - if (Token.isNot(MIToken::Identifier) || Token.stringValue() != Word) - return error(Twine("expected '") + Word + "'"); - lex(); - MachinePointerInfo Ptr = MachinePointerInfo(); - if (parseMachinePointerInfo(Ptr)) - return true; + if (Token.is(MIToken::Identifier)) { + const char *Word = Flags & MachineMemOperand::MOLoad ? "from" : "into"; + if (Token.stringValue() != Word) + return error(Twine("expected '") + Word + "'"); + lex(); + + if (parseMachinePointerInfo(Ptr)) + return true; + } unsigned BaseAlignment = Size; AAMDNodes AAInfo; MDNode *Range = nullptr; @@ -1947,65 +2053,42 @@ bool MIParser::getBitmaskTargetFlag(StringRef Name, unsigned &Flag) { return false; } -bool llvm::parseMachineBasicBlockDefinitions(MachineFunction &MF, StringRef Src, - PerFunctionMIParsingState &PFS, - const SlotMapping &IRSlots, +bool llvm::parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS, + StringRef Src, SMDiagnostic &Error) { - SourceMgr SM; - SM.AddNewSourceBuffer( - MemoryBuffer::getMemBuffer(Src, "", /*RequiresNullTerminator=*/false), - SMLoc()); - return MIParser(SM, MF, Error, Src, PFS, IRSlots) - .parseBasicBlockDefinitions(PFS.MBBSlots); -} - -bool llvm::parseMachineInstructions(MachineFunction &MF, StringRef Src, - const PerFunctionMIParsingState &PFS, - const SlotMapping &IRSlots, - SMDiagnostic &Error) { - SourceMgr SM; - SM.AddNewSourceBuffer( - MemoryBuffer::getMemBuffer(Src, "", /*RequiresNullTerminator=*/false), - SMLoc()); - return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseBasicBlocks(); -} - -bool llvm::parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM, - MachineFunction &MF, StringRef Src, - const PerFunctionMIParsingState &PFS, - const SlotMapping &IRSlots, SMDiagnostic &Error) { - return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseStandaloneMBB(MBB); -} - -bool llvm::parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM, - MachineFunction &MF, StringRef Src, - const PerFunctionMIParsingState &PFS, - const SlotMapping &IRSlots, + return MIParser(PFS, Error, Src).parseBasicBlockDefinitions(PFS.MBBSlots); +} + +bool llvm::parseMachineInstructions(const PerFunctionMIParsingState &PFS, + StringRef Src, SMDiagnostic &Error) { + return MIParser(PFS, Error, Src).parseBasicBlocks(); +} + +bool llvm::parseMBBReference(const PerFunctionMIParsingState &PFS, + MachineBasicBlock *&MBB, StringRef Src, + SMDiagnostic &Error) { + return MIParser(PFS, Error, Src).parseStandaloneMBB(MBB); +} + +bool llvm::parseNamedRegisterReference(const PerFunctionMIParsingState &PFS, + unsigned &Reg, StringRef Src, SMDiagnostic &Error) { - return MIParser(SM, MF, Error, Src, PFS, IRSlots) - .parseStandaloneNamedRegister(Reg); + return MIParser(PFS, Error, Src).parseStandaloneNamedRegister(Reg); } -bool llvm::parseVirtualRegisterReference(unsigned &Reg, SourceMgr &SM, - MachineFunction &MF, StringRef Src, - const PerFunctionMIParsingState &PFS, - const SlotMapping &IRSlots, +bool llvm::parseVirtualRegisterReference(const PerFunctionMIParsingState &PFS, + unsigned &Reg, StringRef Src, SMDiagnostic &Error) { - return MIParser(SM, MF, Error, Src, PFS, IRSlots) - .parseStandaloneVirtualRegister(Reg); + return MIParser(PFS, Error, Src).parseStandaloneVirtualRegister(Reg); } -bool llvm::parseStackObjectReference(int &FI, SourceMgr &SM, - MachineFunction &MF, StringRef Src, - const PerFunctionMIParsingState &PFS, - const SlotMapping &IRSlots, +bool llvm::parseStackObjectReference(const PerFunctionMIParsingState &PFS, + int &FI, StringRef Src, SMDiagnostic &Error) { - return MIParser(SM, MF, Error, Src, PFS, IRSlots) - .parseStandaloneStackObject(FI); + return MIParser(PFS, Error, Src).parseStandaloneStackObject(FI); } -bool llvm::parseMDNode(MDNode *&Node, SourceMgr &SM, MachineFunction &MF, - StringRef Src, const PerFunctionMIParsingState &PFS, - const SlotMapping &IRSlots, SMDiagnostic &Error) { - return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseStandaloneMDNode(Node); +bool llvm::parseMDNode(const PerFunctionMIParsingState &PFS, + MDNode *&Node, StringRef Src, SMDiagnostic &Error) { + return MIParser(PFS, Error, Src).parseStandaloneMDNode(Node); } diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h index 8aef704..18895b9 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h +++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h @@ -15,26 +15,37 @@ #define LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/SmallSet.h" namespace llvm { +class StringRef; class BasicBlock; class MachineBasicBlock; -class MachineInstr; class MachineFunction; +class MachineInstr; +class MachineRegisterInfo; class MDNode; struct SlotMapping; class SMDiagnostic; class SourceMgr; struct PerFunctionMIParsingState { + MachineFunction &MF; + SourceMgr *SM; + const SlotMapping &IRSlots; + DenseMap<unsigned, MachineBasicBlock *> MBBSlots; DenseMap<unsigned, unsigned> VirtualRegisterSlots; DenseMap<unsigned, int> FixedStackObjectSlots; DenseMap<unsigned, int> StackObjectSlots; DenseMap<unsigned, unsigned> ConstantPoolSlots; DenseMap<unsigned, unsigned> JumpTableSlots; + /// Hold the generic virtual registers. + SmallSet<unsigned, 8> GenericVRegs; + + PerFunctionMIParsingState(MachineFunction &MF, SourceMgr &SM, + const SlotMapping &IRSlots); }; /// Parse the machine basic block definitions, and skip the machine @@ -49,10 +60,8 @@ struct PerFunctionMIParsingState { /// resolve the machine basic block references. /// /// Return true if an error occurred. -bool parseMachineBasicBlockDefinitions(MachineFunction &MF, StringRef Src, - PerFunctionMIParsingState &PFS, - const SlotMapping &IRSlots, - SMDiagnostic &Error); +bool parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS, + StringRef Src, SMDiagnostic &Error); /// Parse the machine instructions. /// @@ -64,35 +73,26 @@ bool parseMachineBasicBlockDefinitions(MachineFunction &MF, StringRef Src, /// on the given source string. /// /// Return true if an error occurred. -bool parseMachineInstructions(MachineFunction &MF, StringRef Src, - const PerFunctionMIParsingState &PFS, - const SlotMapping &IRSlots, SMDiagnostic &Error); - -bool parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM, - MachineFunction &MF, StringRef Src, - const PerFunctionMIParsingState &PFS, - const SlotMapping &IRSlots, SMDiagnostic &Error); - -bool parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM, - MachineFunction &MF, StringRef Src, - const PerFunctionMIParsingState &PFS, - const SlotMapping &IRSlots, +bool parseMachineInstructions(const PerFunctionMIParsingState &PFS, + StringRef Src, SMDiagnostic &Error); + +bool parseMBBReference(const PerFunctionMIParsingState &PFS, + MachineBasicBlock *&MBB, StringRef Src, + SMDiagnostic &Error); + +bool parseNamedRegisterReference(const PerFunctionMIParsingState &PFS, + unsigned &Reg, StringRef Src, SMDiagnostic &Error); -bool parseVirtualRegisterReference(unsigned &Reg, SourceMgr &SM, - MachineFunction &MF, StringRef Src, - const PerFunctionMIParsingState &PFS, - const SlotMapping &IRSlots, +bool parseVirtualRegisterReference(const PerFunctionMIParsingState &PFS, + unsigned &Reg, StringRef Src, SMDiagnostic &Error); -bool parseStackObjectReference(int &FI, SourceMgr &SM, MachineFunction &MF, - StringRef Src, - const PerFunctionMIParsingState &PFS, - const SlotMapping &IRSlots, SMDiagnostic &Error); +bool parseStackObjectReference(const PerFunctionMIParsingState &PFS, + int &FI, StringRef Src, SMDiagnostic &Error); -bool parseMDNode(MDNode *&Node, SourceMgr &SM, MachineFunction &MF, - StringRef Src, const PerFunctionMIParsingState &PFS, - const SlotMapping &IRSlots, SMDiagnostic &Error); +bool parseMDNode(const PerFunctionMIParsingState &PFS, MDNode *&Node, + StringRef Src, SMDiagnostic &Error); } // end namespace llvm diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index 422efbc..4aa3df6 100644 --- a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -15,27 +15,30 @@ #include "llvm/CodeGen/MIRParser/MIRParser.h" #include "MIParser.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" -#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/YAMLTraits.h" #include <memory> @@ -53,6 +56,8 @@ class MIRParserImpl { SlotMapping IRSlots; /// Maps from register class names to register classes. StringMap<const TargetRegisterClass *> Names2RegClasses; + /// Maps from register bank names to register banks. + StringMap<const RegisterBank *> Names2RegBanks; public: MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename, @@ -97,44 +102,38 @@ public: /// Return true if error occurred. bool initializeMachineFunction(MachineFunction &MF); - bool initializeRegisterInfo(MachineFunction &MF, - const yaml::MachineFunction &YamlMF, - PerFunctionMIParsingState &PFS); + bool initializeRegisterInfo(PerFunctionMIParsingState &PFS, + const yaml::MachineFunction &YamlMF); - void inferRegisterInfo(MachineFunction &MF, + void inferRegisterInfo(const PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF); - bool initializeFrameInfo(MachineFunction &MF, - const yaml::MachineFunction &YamlMF, - PerFunctionMIParsingState &PFS); + bool initializeFrameInfo(PerFunctionMIParsingState &PFS, + const yaml::MachineFunction &YamlMF); - bool parseCalleeSavedRegister(MachineFunction &MF, - PerFunctionMIParsingState &PFS, + bool parseCalleeSavedRegister(PerFunctionMIParsingState &PFS, std::vector<CalleeSavedInfo> &CSIInfo, const yaml::StringValue &RegisterSource, int FrameIdx); - bool parseStackObjectsDebugInfo(MachineFunction &MF, - PerFunctionMIParsingState &PFS, + bool parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS, const yaml::MachineStackObject &Object, int FrameIdx); - bool initializeConstantPool(MachineConstantPool &ConstantPool, - const yaml::MachineFunction &YamlMF, - const MachineFunction &MF, - DenseMap<unsigned, unsigned> &ConstantPoolSlots); + bool initializeConstantPool(PerFunctionMIParsingState &PFS, + MachineConstantPool &ConstantPool, + const yaml::MachineFunction &YamlMF); - bool initializeJumpTableInfo(MachineFunction &MF, - const yaml::MachineJumpTable &YamlJTI, - PerFunctionMIParsingState &PFS); + bool initializeJumpTableInfo(PerFunctionMIParsingState &PFS, + const yaml::MachineJumpTable &YamlJTI); private: - bool parseMDNode(MDNode *&Node, const yaml::StringValue &Source, - MachineFunction &MF, const PerFunctionMIParsingState &PFS); + bool parseMDNode(const PerFunctionMIParsingState &PFS, MDNode *&Node, + const yaml::StringValue &Source); - bool parseMBBReference(MachineBasicBlock *&MBB, - const yaml::StringValue &Source, MachineFunction &MF, - const PerFunctionMIParsingState &PFS); + bool parseMBBReference(const PerFunctionMIParsingState &PFS, + MachineBasicBlock *&MBB, + const yaml::StringValue &Source); /// Return a MIR diagnostic converted from an MI string diagnostic. SMDiagnostic diagFromMIStringDiag(const SMDiagnostic &Error, @@ -149,12 +148,18 @@ private: void createDummyFunction(StringRef Name, Module &M); void initNames2RegClasses(const MachineFunction &MF); + void initNames2RegBanks(const MachineFunction &MF); /// Check if the given identifier is a name of a register class. /// /// Return null if the name isn't a register class. const TargetRegisterClass *getRegClass(const MachineFunction &MF, StringRef Name); + + /// Check if the given identifier is a name of a register bank. + /// + /// Return null if the name isn't a register bank. + const RegisterBank *getRegBank(const MachineFunction &MF, StringRef Name); }; } // end namespace llvm @@ -226,7 +231,7 @@ std::unique_ptr<Module> MIRParserImpl::parse() { Context, &IRSlots); if (!M) { reportDiagnostic(diagFromBlockStringDiag(Error, BSN->getSourceRange())); - return M; + return nullptr; } In.nextDocument(); if (!In.setCurrentDocument()) @@ -285,46 +290,60 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) { MF.setAlignment(YamlMF.Alignment); MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); MF.setHasInlineAsm(YamlMF.HasInlineAsm); - PerFunctionMIParsingState PFS; - if (initializeRegisterInfo(MF, YamlMF, PFS)) + if (YamlMF.AllVRegsAllocated) + MF.getProperties().set(MachineFunctionProperties::Property::AllVRegsAllocated); + PerFunctionMIParsingState PFS(MF, SM, IRSlots); + if (initializeRegisterInfo(PFS, YamlMF)) return true; if (!YamlMF.Constants.empty()) { auto *ConstantPool = MF.getConstantPool(); assert(ConstantPool && "Constant pool must be created"); - if (initializeConstantPool(*ConstantPool, YamlMF, MF, - PFS.ConstantPoolSlots)) + if (initializeConstantPool(PFS, *ConstantPool, YamlMF)) return true; } + StringRef BlockStr = YamlMF.Body.Value.Value; SMDiagnostic Error; - if (parseMachineBasicBlockDefinitions(MF, YamlMF.Body.Value.Value, PFS, - IRSlots, Error)) { + SourceMgr BlockSM; + BlockSM.AddNewSourceBuffer( + MemoryBuffer::getMemBuffer(BlockStr, "",/*RequiresNullTerminator=*/false), + SMLoc()); + PFS.SM = &BlockSM; + if (parseMachineBasicBlockDefinitions(PFS, BlockStr, Error)) { reportDiagnostic( diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange)); return true; } + PFS.SM = &SM; if (MF.empty()) return error(Twine("machine function '") + Twine(MF.getName()) + "' requires at least one machine basic block in its body"); // Initialize the frame information after creating all the MBBs so that the // MBB references in the frame information can be resolved. - if (initializeFrameInfo(MF, YamlMF, PFS)) + if (initializeFrameInfo(PFS, YamlMF)) return true; // Initialize the jump table after creating all the MBBs so that the MBB // references can be resolved. if (!YamlMF.JumpTableInfo.Entries.empty() && - initializeJumpTableInfo(MF, YamlMF.JumpTableInfo, PFS)) + initializeJumpTableInfo(PFS, YamlMF.JumpTableInfo)) return true; // Parse the machine instructions after creating all of the MBBs so that the // parser can resolve the MBB references. - if (parseMachineInstructions(MF, YamlMF.Body.Value.Value, PFS, IRSlots, - Error)) { + StringRef InsnStr = YamlMF.Body.Value.Value; + SourceMgr InsnSM; + InsnSM.AddNewSourceBuffer( + MemoryBuffer::getMemBuffer(InsnStr, "", /*RequiresNullTerminator=*/false), + SMLoc()); + PFS.SM = &InsnSM; + if (parseMachineInstructions(PFS, InsnStr, Error)) { reportDiagnostic( diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange)); return true; } - inferRegisterInfo(MF, YamlMF); + PFS.SM = &SM; + + inferRegisterInfo(PFS, YamlMF); // FIXME: This is a temporary workaround until the reserved registers can be // serialized. MF.getRegInfo().freezeReservedRegs(MF); @@ -332,9 +351,9 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) { return false; } -bool MIRParserImpl::initializeRegisterInfo(MachineFunction &MF, - const yaml::MachineFunction &YamlMF, - PerFunctionMIParsingState &PFS) { +bool MIRParserImpl::initializeRegisterInfo(PerFunctionMIParsingState &PFS, + const yaml::MachineFunction &YamlMF) { + MachineFunction &MF = PFS.MF; MachineRegisterInfo &RegInfo = MF.getRegInfo(); assert(RegInfo.isSSA()); if (!YamlMF.IsSSA) @@ -347,12 +366,28 @@ bool MIRParserImpl::initializeRegisterInfo(MachineFunction &MF, SMDiagnostic Error; // Parse the virtual register information. for (const auto &VReg : YamlMF.VirtualRegisters) { - const auto *RC = getRegClass(MF, VReg.Class.Value); - if (!RC) - return error(VReg.Class.SourceRange.Start, - Twine("use of undefined register class '") + - VReg.Class.Value + "'"); - unsigned Reg = RegInfo.createVirtualRegister(RC); + unsigned Reg; + if (StringRef(VReg.Class.Value).equals("_")) { + // This is a generic virtual register. + // The size will be set appropriately when we reach the definition. + Reg = RegInfo.createGenericVirtualRegister(/*Size*/ 1); + PFS.GenericVRegs.insert(Reg); + } else { + const auto *RC = getRegClass(MF, VReg.Class.Value); + if (RC) { + Reg = RegInfo.createVirtualRegister(RC); + } else { + const auto *RegBank = getRegBank(MF, VReg.Class.Value); + if (!RegBank) + return error( + VReg.Class.SourceRange.Start, + Twine("use of undefined register class or register bank '") + + VReg.Class.Value + "'"); + Reg = RegInfo.createGenericVirtualRegister(/*Size*/ 1); + RegInfo.setRegBank(Reg, *RegBank); + PFS.GenericVRegs.insert(Reg); + } + } if (!PFS.VirtualRegisterSlots.insert(std::make_pair(VReg.ID.Value, Reg)) .second) return error(VReg.ID.SourceRange.Start, @@ -360,9 +395,8 @@ bool MIRParserImpl::initializeRegisterInfo(MachineFunction &MF, Twine(VReg.ID.Value) + "'"); if (!VReg.PreferredRegister.Value.empty()) { unsigned PreferredReg = 0; - if (parseNamedRegisterReference(PreferredReg, SM, MF, - VReg.PreferredRegister.Value, PFS, - IRSlots, Error)) + if (parseNamedRegisterReference(PFS, PreferredReg, + VReg.PreferredRegister.Value, Error)) return error(Error, VReg.PreferredRegister.SourceRange); RegInfo.setSimpleHint(Reg, PreferredReg); } @@ -371,13 +405,12 @@ bool MIRParserImpl::initializeRegisterInfo(MachineFunction &MF, // Parse the liveins. for (const auto &LiveIn : YamlMF.LiveIns) { unsigned Reg = 0; - if (parseNamedRegisterReference(Reg, SM, MF, LiveIn.Register.Value, PFS, - IRSlots, Error)) + if (parseNamedRegisterReference(PFS, Reg, LiveIn.Register.Value, Error)) return error(Error, LiveIn.Register.SourceRange); unsigned VReg = 0; if (!LiveIn.VirtualRegister.Value.empty()) { - if (parseVirtualRegisterReference( - VReg, SM, MF, LiveIn.VirtualRegister.Value, PFS, IRSlots, Error)) + if (parseVirtualRegisterReference(PFS, VReg, LiveIn.VirtualRegister.Value, + Error)) return error(Error, LiveIn.VirtualRegister.SourceRange); } RegInfo.addLiveIn(Reg, VReg); @@ -389,8 +422,7 @@ bool MIRParserImpl::initializeRegisterInfo(MachineFunction &MF, return false; for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) { unsigned Reg = 0; - if (parseNamedRegisterReference(Reg, SM, MF, RegSource.Value, PFS, IRSlots, - Error)) + if (parseNamedRegisterReference(PFS, Reg, RegSource.Value, Error)) return error(Error, RegSource.SourceRange); CalleeSavedRegisterMask[Reg] = true; } @@ -398,24 +430,25 @@ bool MIRParserImpl::initializeRegisterInfo(MachineFunction &MF, return false; } -void MIRParserImpl::inferRegisterInfo(MachineFunction &MF, +void MIRParserImpl::inferRegisterInfo(const PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF) { if (YamlMF.CalleeSavedRegisters) return; - for (const MachineBasicBlock &MBB : MF) { + MachineRegisterInfo &MRI = PFS.MF.getRegInfo(); + for (const MachineBasicBlock &MBB : PFS.MF) { for (const MachineInstr &MI : MBB) { for (const MachineOperand &MO : MI.operands()) { if (!MO.isRegMask()) continue; - MF.getRegInfo().addPhysRegsUsedFromRegMask(MO.getRegMask()); + MRI.addPhysRegsUsedFromRegMask(MO.getRegMask()); } } } } -bool MIRParserImpl::initializeFrameInfo(MachineFunction &MF, - const yaml::MachineFunction &YamlMF, - PerFunctionMIParsingState &PFS) { +bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, + const yaml::MachineFunction &YamlMF) { + MachineFunction &MF = PFS.MF; MachineFrameInfo &MFI = *MF.getFrameInfo(); const Function &F = *MF.getFunction(); const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo; @@ -435,13 +468,13 @@ bool MIRParserImpl::initializeFrameInfo(MachineFunction &MF, MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc); if (!YamlMFI.SavePoint.Value.empty()) { MachineBasicBlock *MBB = nullptr; - if (parseMBBReference(MBB, YamlMFI.SavePoint, MF, PFS)) + if (parseMBBReference(PFS, MBB, YamlMFI.SavePoint)) return true; MFI.setSavePoint(MBB); } if (!YamlMFI.RestorePoint.Value.empty()) { MachineBasicBlock *MBB = nullptr; - if (parseMBBReference(MBB, YamlMFI.RestorePoint, MF, PFS)) + if (parseMBBReference(PFS, MBB, YamlMFI.RestorePoint)) return true; MFI.setRestorePoint(MBB); } @@ -462,7 +495,7 @@ bool MIRParserImpl::initializeFrameInfo(MachineFunction &MF, return error(Object.ID.SourceRange.Start, Twine("redefinition of fixed stack object '%fixed-stack.") + Twine(Object.ID.Value) + "'"); - if (parseCalleeSavedRegister(MF, PFS, CSIInfo, Object.CalleeSavedRegister, + if (parseCalleeSavedRegister(PFS, CSIInfo, Object.CalleeSavedRegister, ObjectIdx)) return true; } @@ -493,12 +526,12 @@ bool MIRParserImpl::initializeFrameInfo(MachineFunction &MF, return error(Object.ID.SourceRange.Start, Twine("redefinition of stack object '%stack.") + Twine(Object.ID.Value) + "'"); - if (parseCalleeSavedRegister(MF, PFS, CSIInfo, Object.CalleeSavedRegister, + if (parseCalleeSavedRegister(PFS, CSIInfo, Object.CalleeSavedRegister, ObjectIdx)) return true; if (Object.LocalOffset) MFI.mapLocalFrameObject(ObjectIdx, Object.LocalOffset.getValue()); - if (parseStackObjectsDebugInfo(MF, PFS, Object, ObjectIdx)) + if (parseStackObjectsDebugInfo(PFS, Object, ObjectIdx)) return true; } MFI.setCalleeSavedInfo(CSIInfo); @@ -510,24 +543,21 @@ bool MIRParserImpl::initializeFrameInfo(MachineFunction &MF, if (!YamlMFI.StackProtector.Value.empty()) { SMDiagnostic Error; int FI; - if (parseStackObjectReference(FI, SM, MF, YamlMFI.StackProtector.Value, PFS, - IRSlots, Error)) + if (parseStackObjectReference(PFS, FI, YamlMFI.StackProtector.Value, Error)) return error(Error, YamlMFI.StackProtector.SourceRange); MFI.setStackProtectorIndex(FI); } return false; } -bool MIRParserImpl::parseCalleeSavedRegister( - MachineFunction &MF, PerFunctionMIParsingState &PFS, +bool MIRParserImpl::parseCalleeSavedRegister(PerFunctionMIParsingState &PFS, std::vector<CalleeSavedInfo> &CSIInfo, const yaml::StringValue &RegisterSource, int FrameIdx) { if (RegisterSource.Value.empty()) return false; unsigned Reg = 0; SMDiagnostic Error; - if (parseNamedRegisterReference(Reg, SM, MF, RegisterSource.Value, PFS, - IRSlots, Error)) + if (parseNamedRegisterReference(PFS, Reg, RegisterSource.Value, Error)) return error(Error, RegisterSource.SourceRange); CSIInfo.push_back(CalleeSavedInfo(Reg, FrameIdx)); return false; @@ -548,16 +578,15 @@ static bool typecheckMDNode(T *&Result, MDNode *Node, return false; } -bool MIRParserImpl::parseStackObjectsDebugInfo( - MachineFunction &MF, PerFunctionMIParsingState &PFS, +bool MIRParserImpl::parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS, const yaml::MachineStackObject &Object, int FrameIdx) { // Debug information can only be attached to stack objects; Fixed stack // objects aren't supported. assert(FrameIdx >= 0 && "Expected a stack object frame index"); MDNode *Var = nullptr, *Expr = nullptr, *Loc = nullptr; - if (parseMDNode(Var, Object.DebugVar, MF, PFS) || - parseMDNode(Expr, Object.DebugExpr, MF, PFS) || - parseMDNode(Loc, Object.DebugLoc, MF, PFS)) + if (parseMDNode(PFS, Var, Object.DebugVar) || + parseMDNode(PFS, Expr, Object.DebugExpr) || + parseMDNode(PFS, Loc, Object.DebugLoc)) return true; if (!Var && !Expr && !Loc) return false; @@ -568,25 +597,24 @@ bool MIRParserImpl::parseStackObjectsDebugInfo( typecheckMDNode(DIExpr, Expr, Object.DebugExpr, "DIExpression", *this) || typecheckMDNode(DILoc, Loc, Object.DebugLoc, "DILocation", *this)) return true; - MF.getMMI().setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc); + PFS.MF.getMMI().setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc); return false; } -bool MIRParserImpl::parseMDNode(MDNode *&Node, const yaml::StringValue &Source, - MachineFunction &MF, - const PerFunctionMIParsingState &PFS) { +bool MIRParserImpl::parseMDNode(const PerFunctionMIParsingState &PFS, + MDNode *&Node, const yaml::StringValue &Source) { if (Source.Value.empty()) return false; SMDiagnostic Error; - if (llvm::parseMDNode(Node, SM, MF, Source.Value, PFS, IRSlots, Error)) + if (llvm::parseMDNode(PFS, Node, Source.Value, Error)) return error(Error, Source.SourceRange); return false; } -bool MIRParserImpl::initializeConstantPool( - MachineConstantPool &ConstantPool, const yaml::MachineFunction &YamlMF, - const MachineFunction &MF, - DenseMap<unsigned, unsigned> &ConstantPoolSlots) { +bool MIRParserImpl::initializeConstantPool(PerFunctionMIParsingState &PFS, + MachineConstantPool &ConstantPool, const yaml::MachineFunction &YamlMF) { + DenseMap<unsigned, unsigned> &ConstantPoolSlots = PFS.ConstantPoolSlots; + const MachineFunction &MF = PFS.MF; const auto &M = *MF.getFunction()->getParent(); SMDiagnostic Error; for (const auto &YamlConstant : YamlMF.Constants) { @@ -608,15 +636,14 @@ bool MIRParserImpl::initializeConstantPool( return false; } -bool MIRParserImpl::initializeJumpTableInfo( - MachineFunction &MF, const yaml::MachineJumpTable &YamlJTI, - PerFunctionMIParsingState &PFS) { - MachineJumpTableInfo *JTI = MF.getOrCreateJumpTableInfo(YamlJTI.Kind); +bool MIRParserImpl::initializeJumpTableInfo(PerFunctionMIParsingState &PFS, + const yaml::MachineJumpTable &YamlJTI) { + MachineJumpTableInfo *JTI = PFS.MF.getOrCreateJumpTableInfo(YamlJTI.Kind); for (const auto &Entry : YamlJTI.Entries) { std::vector<MachineBasicBlock *> Blocks; for (const auto &MBBSource : Entry.Blocks) { MachineBasicBlock *MBB = nullptr; - if (parseMBBReference(MBB, MBBSource.Value, MF, PFS)) + if (parseMBBReference(PFS, MBB, MBBSource.Value)) return true; Blocks.push_back(MBB); } @@ -630,12 +657,11 @@ bool MIRParserImpl::initializeJumpTableInfo( return false; } -bool MIRParserImpl::parseMBBReference(MachineBasicBlock *&MBB, - const yaml::StringValue &Source, - MachineFunction &MF, - const PerFunctionMIParsingState &PFS) { +bool MIRParserImpl::parseMBBReference(const PerFunctionMIParsingState &PFS, + MachineBasicBlock *&MBB, + const yaml::StringValue &Source) { SMDiagnostic Error; - if (llvm::parseMBBReference(MBB, SM, MF, Source.Value, PFS, IRSlots, Error)) + if (llvm::parseMBBReference(PFS, MBB, Source.Value, Error)) return error(Error, Source.SourceRange); return false; } @@ -698,6 +724,21 @@ void MIRParserImpl::initNames2RegClasses(const MachineFunction &MF) { } } +void MIRParserImpl::initNames2RegBanks(const MachineFunction &MF) { + if (!Names2RegBanks.empty()) + return; + const RegisterBankInfo *RBI = MF.getSubtarget().getRegBankInfo(); + // If the target does not support GlobalISel, we may not have a + // register bank info. + if (!RBI) + return; + for (unsigned I = 0, E = RBI->getNumRegBanks(); I < E; ++I) { + const auto &RegBank = RBI->getRegBank(I); + Names2RegBanks.insert( + std::make_pair(StringRef(RegBank.getName()).lower(), &RegBank)); + } +} + const TargetRegisterClass *MIRParserImpl::getRegClass(const MachineFunction &MF, StringRef Name) { initNames2RegClasses(MF); @@ -707,6 +748,15 @@ const TargetRegisterClass *MIRParserImpl::getRegClass(const MachineFunction &MF, return RegClassInfo->getValue(); } +const RegisterBank *MIRParserImpl::getRegBank(const MachineFunction &MF, + StringRef Name) { + initNames2RegBanks(MF); + auto RegBankInfo = Names2RegBanks.find(Name); + if (RegBankInfo == Names2RegBanks.end()) + return nullptr; + return RegBankInfo->getValue(); +} + MIRParser::MIRParser(std::unique_ptr<MIRParserImpl> Impl) : Impl(std::move(Impl)) {} diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp index 175cb0d..703c99d 100644 --- a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp @@ -14,23 +14,25 @@ #include "MIRPrinter.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Instructions.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -118,7 +120,8 @@ public: void printOffset(int64_t Offset); void printTargetFlags(const MachineOperand &Op); void print(const MachineOperand &Op, const TargetRegisterInfo *TRI, - unsigned I, bool ShouldPrintRegisterTies, bool IsDef = false); + unsigned I, bool ShouldPrintRegisterTies, + const MachineRegisterInfo *MRI = nullptr, bool IsDef = false); void print(const MachineMemOperand &Op); void print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI); @@ -170,6 +173,9 @@ void MIRPrinter::print(const MachineFunction &MF) { YamlMF.Alignment = MF.getAlignment(); YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice(); YamlMF.HasInlineAsm = MF.hasInlineAsm(); + YamlMF.AllVRegsAllocated = MF.getProperties().hasProperty( + MachineFunctionProperties::Property::AllVRegsAllocated); + convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo()); ModuleSlotTracker MST(MF.getFunction()->getParent()); MST.incorporateFunction(*MF.getFunction()); @@ -206,8 +212,15 @@ void MIRPrinter::convert(yaml::MachineFunction &MF, unsigned Reg = TargetRegisterInfo::index2VirtReg(I); yaml::VirtualRegisterDefinition VReg; VReg.ID = I; - VReg.Class = - StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower(); + if (RegInfo.getRegClassOrNull(Reg)) + VReg.Class = + StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower(); + else if (RegInfo.getRegBankOrNull(Reg)) + VReg.Class = StringRef(RegInfo.getRegBankOrNull(Reg)->getName()).lower(); + else { + VReg.Class = std::string("_"); + assert(RegInfo.getSize(Reg) && "Generic registers must have a size"); + } unsigned PreferredReg = RegInfo.getSimpleHint(Reg); if (PreferredReg) printReg(PreferredReg, VReg.PreferredRegister, TRI); @@ -525,7 +538,9 @@ static bool hasComplexRegisterTies(const MachineInstr &MI) { } void MIPrinter::print(const MachineInstr &MI) { - const auto &SubTarget = MI.getParent()->getParent()->getSubtarget(); + const auto *MF = MI.getParent()->getParent(); + const auto &MRI = MF->getRegInfo(); + const auto &SubTarget = MF->getSubtarget(); const auto *TRI = SubTarget.getRegisterInfo(); assert(TRI && "Expected target register info"); const auto *TII = SubTarget.getInstrInfo(); @@ -540,7 +555,8 @@ void MIPrinter::print(const MachineInstr &MI) { ++I) { if (I) OS << ", "; - print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, /*IsDef=*/true); + print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, &MRI, + /*IsDef=*/true); } if (I) @@ -548,6 +564,11 @@ void MIPrinter::print(const MachineInstr &MI) { if (MI.getFlag(MachineInstr::FrameSetup)) OS << "frame-setup "; OS << TII->getName(MI.getOpcode()); + if (isPreISelGenericOpcode(MI.getOpcode())) { + assert(MI.getType() && "Generic instructions must have a type"); + OS << ' '; + MI.getType()->print(OS, /*IsForDebug*/ false, /*NoDetails*/ true); + } if (I < E) OS << ' '; @@ -727,7 +748,8 @@ static const char *getTargetIndexName(const MachineFunction &MF, int Index) { } void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, - unsigned I, bool ShouldPrintRegisterTies, bool IsDef) { + unsigned I, bool ShouldPrintRegisterTies, + const MachineRegisterInfo *MRI, bool IsDef) { printTargetFlags(Op); switch (Op.getType()) { case MachineOperand::MO_Register: @@ -754,6 +776,9 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, OS << ':' << TRI->getSubRegIndexName(Op.getSubReg()); if (ShouldPrintRegisterTies && Op.isTied() && !Op.isDef()) OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(I) << ")"; + assert((!IsDef || MRI) && "for IsDef, MRI must be provided"); + if (IsDef && MRI->getSize(Op.getReg())) + OS << '(' << MRI->getSize(Op.getReg()) << ')'; break; case MachineOperand::MO_Immediate: OS << Op.getImm(); @@ -858,11 +883,12 @@ void MIPrinter::print(const MachineMemOperand &Op) { assert(Op.isStore() && "Non load machine operand must be a store"); OS << "store "; } - OS << Op.getSize() << (Op.isLoad() ? " from " : " into "); + OS << Op.getSize(); if (const Value *Val = Op.getValue()) { + OS << (Op.isLoad() ? " from " : " into "); printIRValueReference(*Val); - } else { - const PseudoSourceValue *PVal = Op.getPseudoValue(); + } else if (const PseudoSourceValue *PVal = Op.getPseudoValue()) { + OS << (Op.isLoad() ? " from " : " into "); assert(PVal && "Expected a pseudo source value"); switch (PVal->kind()) { case PseudoSourceValue::Stack: diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp index 85d544d..689dd07 100644 --- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -13,7 +13,6 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineDominators.h" @@ -199,16 +198,6 @@ MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() { return end(); } -const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const { - // A block with a landing pad successor only has one other successor. - if (succ_size() > 2) - return nullptr; - for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I) - if ((*I)->isEHPad()) - return *I; - return nullptr; -} - bool MachineBasicBlock::hasEHPadSuccessor() const { for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I) if ((*I)->isEHPad()) @@ -217,7 +206,7 @@ bool MachineBasicBlock::hasEHPadSuccessor() const { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void MachineBasicBlock::dump() const { +LLVM_DUMP_METHOD void MachineBasicBlock::dump() const { print(dbgs()); } #endif @@ -241,7 +230,8 @@ std::string MachineBasicBlock::getFullName() const { return Name; } -void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { +void MachineBasicBlock::print(raw_ostream &OS, const SlotIndexes *Indexes) + const { const MachineFunction *MF = getParent(); if (!MF) { OS << "Can't print out MachineBasicBlock because parent MachineFunction" @@ -255,7 +245,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { } void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, - SlotIndexes *Indexes) const { + const SlotIndexes *Indexes) const { const MachineFunction *MF = getParent(); if (!MF) { OS << "Can't print out MachineBasicBlock because parent MachineFunction" @@ -302,16 +292,16 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << '\n'; } - for (const_instr_iterator I = instr_begin(); I != instr_end(); ++I) { + for (auto &I : instrs()) { if (Indexes) { - if (Indexes->hasIndex(&*I)) - OS << Indexes->getInstructionIndex(&*I); + if (Indexes->hasIndex(I)) + OS << Indexes->getInstructionIndex(I); OS << '\t'; } OS << '\t'; - if (I->isInsideBundle()) + if (I.isInsideBundle()) OS << " * "; - I->print(OS, MST); + I.print(OS, MST); } // Print the successors of this block according to the CFG. @@ -414,24 +404,25 @@ void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) { void MachineBasicBlock::updateTerminator() { const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); // A block with no successors has no concerns with fall-through edges. - if (this->succ_empty()) return; + if (this->succ_empty()) + return; MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; DebugLoc DL; // FIXME: this is nowhere - bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond); + bool B = TII->analyzeBranch(*this, TBB, FBB, Cond); (void) B; assert(!B && "UpdateTerminators requires analyzable predecessors!"); if (Cond.empty()) { if (TBB) { - // The block has an unconditional branch. If its successor is now - // its layout successor, delete the branch. + // The block has an unconditional branch. If its successor is now its + // layout successor, delete the branch. if (isLayoutSuccessor(TBB)) TII->RemoveBranch(*this); } else { - // The block has an unconditional fallthrough. If its successor is not - // its layout successor, insert a branch. First we have to locate the - // only non-landing-pad successor, as that is the fallthrough block. + // The block has an unconditional fallthrough. If its successor is not its + // layout successor, insert a branch. First we have to locate the only + // non-landing-pad successor, as that is the fallthrough block. for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { if ((*SI)->isEHPad()) continue; @@ -439,8 +430,8 @@ void MachineBasicBlock::updateTerminator() { TBB = *SI; } - // If there is no non-landing-pad successor, the block has no - // fall-through edges to be concerned with. + // If there is no non-landing-pad successor, the block has no fall-through + // edges to be concerned with. if (!TBB) return; @@ -449,61 +440,73 @@ void MachineBasicBlock::updateTerminator() { if (!isLayoutSuccessor(TBB)) TII->InsertBranch(*this, TBB, nullptr, Cond, DL); } - } else { - if (FBB) { - // The block has a non-fallthrough conditional branch. If one of its - // successors is its layout successor, rewrite it to a fallthrough - // conditional branch. - if (isLayoutSuccessor(TBB)) { - if (TII->ReverseBranchCondition(Cond)) - return; - TII->RemoveBranch(*this); - TII->InsertBranch(*this, FBB, nullptr, Cond, DL); - } else if (isLayoutSuccessor(FBB)) { - TII->RemoveBranch(*this); - TII->InsertBranch(*this, TBB, nullptr, Cond, DL); - } - } else { - // Walk through the successors and find the successor which is not - // a landing pad and is not the conditional branch destination (in TBB) - // as the fallthrough successor. - MachineBasicBlock *FallthroughBB = nullptr; - for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { - if ((*SI)->isEHPad() || *SI == TBB) - continue; - assert(!FallthroughBB && "Found more than one fallthrough successor."); - FallthroughBB = *SI; - } - if (!FallthroughBB && canFallThrough()) { - // We fallthrough to the same basic block as the conditional jump - // targets. Remove the conditional jump, leaving unconditional - // fallthrough. - // FIXME: This does not seem like a reasonable pattern to support, but - // it has been seen in the wild coming out of degenerate ARM test cases. - TII->RemoveBranch(*this); + return; + } - // Finally update the unconditional successor to be reached via a branch - // if it would not be reached by fallthrough. - if (!isLayoutSuccessor(TBB)) - TII->InsertBranch(*this, TBB, nullptr, Cond, DL); + if (FBB) { + // The block has a non-fallthrough conditional branch. If one of its + // successors is its layout successor, rewrite it to a fallthrough + // conditional branch. + if (isLayoutSuccessor(TBB)) { + if (TII->ReverseBranchCondition(Cond)) return; - } + TII->RemoveBranch(*this); + TII->InsertBranch(*this, FBB, nullptr, Cond, DL); + } else if (isLayoutSuccessor(FBB)) { + TII->RemoveBranch(*this); + TII->InsertBranch(*this, TBB, nullptr, Cond, DL); + } + return; + } - // The block has a fallthrough conditional branch. - if (isLayoutSuccessor(TBB)) { - if (TII->ReverseBranchCondition(Cond)) { - // We can't reverse the condition, add an unconditional branch. - Cond.clear(); - TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL); - return; - } - TII->RemoveBranch(*this); - TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL); - } else if (!isLayoutSuccessor(FallthroughBB)) { - TII->RemoveBranch(*this); - TII->InsertBranch(*this, TBB, FallthroughBB, Cond, DL); - } + // Walk through the successors and find the successor which is not a landing + // pad and is not the conditional branch destination (in TBB) as the + // fallthrough successor. + MachineBasicBlock *FallthroughBB = nullptr; + for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { + if ((*SI)->isEHPad() || *SI == TBB) + continue; + assert(!FallthroughBB && "Found more than one fallthrough successor."); + FallthroughBB = *SI; + } + + if (!FallthroughBB) { + if (canFallThrough()) { + // We fallthrough to the same basic block as the conditional jump targets. + // Remove the conditional jump, leaving unconditional fallthrough. + // FIXME: This does not seem like a reasonable pattern to support, but it + // has been seen in the wild coming out of degenerate ARM test cases. + TII->RemoveBranch(*this); + + // Finally update the unconditional successor to be reached via a branch if + // it would not be reached by fallthrough. + if (!isLayoutSuccessor(TBB)) + TII->InsertBranch(*this, TBB, nullptr, Cond, DL); + return; + } + + // We enter here iff exactly one successor is TBB which cannot fallthrough + // and the rest successors if any are EHPads. In this case, we need to + // change the conditional branch into unconditional branch. + TII->RemoveBranch(*this); + Cond.clear(); + TII->InsertBranch(*this, TBB, nullptr, Cond, DL); + return; + } + + // The block has a fallthrough conditional branch. + if (isLayoutSuccessor(TBB)) { + if (TII->ReverseBranchCondition(Cond)) { + // We can't reverse the condition, add an unconditional branch. + Cond.clear(); + TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL); + return; } + TII->RemoveBranch(*this); + TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL); + } else if (!isLayoutSuccessor(FallthroughBB)) { + TII->RemoveBranch(*this); + TII->InsertBranch(*this, TBB, FallthroughBB, Cond, DL); } } @@ -685,13 +688,13 @@ bool MachineBasicBlock::canFallThrough() { MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); - if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) { + if (TII->analyzeBranch(*this, TBB, FBB, Cond)) { // If we couldn't analyze the branch, examine the last instruction. // If the block doesn't end in a known control barrier, assume fallthrough // is possible. The isPredicated check is needed because this code can be // called during IfConversion, where an instruction which is normally a // Barrier is predicated and thus no longer an actual control barrier. - return empty() || !back().isBarrier() || TII->isPredicated(&back()); + return empty() || !back().isBarrier() || TII->isPredicated(back()); } // If there is no branch, control always falls through. @@ -712,39 +715,14 @@ bool MachineBasicBlock::canFallThrough() { return FBB == nullptr; } -MachineBasicBlock * -MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { - // Splitting the critical edge to a landing pad block is non-trivial. Don't do - // it in this generic function. - if (Succ->isEHPad()) +MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, + Pass &P) { + if (!canSplitCriticalEdge(Succ)) return nullptr; MachineFunction *MF = getParent(); DebugLoc DL; // FIXME: this is nowhere - // Performance might be harmed on HW that implements branching using exec mask - // where both sides of the branches are always executed. - if (MF->getTarget().requiresStructuredCFG()) - return nullptr; - - // We may need to update this's terminator, but we can't do that if - // AnalyzeBranch fails. If this uses a jump table, we won't touch it. - const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - MachineBasicBlock *TBB = nullptr, *FBB = nullptr; - SmallVector<MachineOperand, 4> Cond; - if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) - return nullptr; - - // Avoid bugpoint weirdness: A block may end with a conditional branch but - // jumps to the same MBB is either case. We have duplicate CFG edges in that - // case that we can't handle. Since this never happens in properly optimized - // code, just skip those edges. - if (TBB && TBB == FBB) { - DEBUG(dbgs() << "Won't split critical edge after degenerate BB#" - << getNumber() << '\n'); - return nullptr; - } - MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); MF->insert(std::next(MachineFunction::iterator(this)), NMBB); DEBUG(dbgs() << "Splitting critical edge:" @@ -752,8 +730,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { << " -- BB#" << NMBB->getNumber() << " -- BB#" << Succ->getNumber() << '\n'); - LiveIntervals *LIS = P->getAnalysisIfAvailable<LiveIntervals>(); - SlotIndexes *Indexes = P->getAnalysisIfAvailable<SlotIndexes>(); + LiveIntervals *LIS = P.getAnalysisIfAvailable<LiveIntervals>(); + SlotIndexes *Indexes = P.getAnalysisIfAvailable<SlotIndexes>(); if (LIS) LIS->insertMBBInMaps(NMBB); else if (Indexes) @@ -762,7 +740,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // On some targets like Mips, branches may kill virtual registers. Make sure // that LiveVariables is properly updated after updateTerminator replaces the // terminators. - LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>(); + LiveVariables *LV = P.getAnalysisIfAvailable<LiveVariables>(); // Collect a list of virtual registers killed by the terminators. SmallVector<unsigned, 4> KilledRegs; @@ -777,7 +755,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { continue; unsigned Reg = OI->getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg) || - LV->getVarInfo(Reg).removeKill(MI)) { + LV->getVarInfo(Reg).removeKill(*MI)) { KilledRegs.push_back(Reg); DEBUG(dbgs() << "Removing terminator kill: " << *MI); OI->setIsKill(false); @@ -826,24 +804,24 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { E = Terminators.end(); I != E; ++I) { if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) == NewTerminators.end()) - Indexes->removeMachineInstrFromMaps(*I); + Indexes->removeMachineInstrFromMaps(**I); } } // Insert unconditional "jump Succ" instruction in NMBB if necessary. NMBB->addSuccessor(Succ); if (!NMBB->isLayoutSuccessor(Succ)) { - Cond.clear(); + SmallVector<MachineOperand, 4> Cond; + const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); TII->InsertBranch(*NMBB, Succ, nullptr, Cond, DL); if (Indexes) { - for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end(); - I != E; ++I) { + for (MachineInstr &MI : NMBB->instrs()) { // Some instructions may have been moved to NMBB by updateTerminator(), // so we first remove any instruction that already has an index. - if (Indexes->hasIndex(&*I)) - Indexes->removeMachineInstrFromMaps(&*I); - Indexes->insertMachineInstrInMaps(&*I); + if (Indexes->hasIndex(MI)) + Indexes->removeMachineInstrFromMaps(MI); + Indexes->insertMachineInstrInMaps(MI); } } } @@ -942,10 +920,10 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { } if (MachineDominatorTree *MDT = - P->getAnalysisIfAvailable<MachineDominatorTree>()) + P.getAnalysisIfAvailable<MachineDominatorTree>()) MDT->recordSplitCriticalEdge(this, Succ, NMBB); - if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>()) + if (MachineLoopInfo *MLI = P.getAnalysisIfAvailable<MachineLoopInfo>()) if (MachineLoop *TIL = MLI->getLoopFor(this)) { // If one or the other blocks were not in a loop, the new block is not // either, and thus LI doesn't need to be updated. @@ -975,6 +953,42 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { return NMBB; } +bool MachineBasicBlock::canSplitCriticalEdge( + const MachineBasicBlock *Succ) const { + // Splitting the critical edge to a landing pad block is non-trivial. Don't do + // it in this generic function. + if (Succ->isEHPad()) + return false; + + const MachineFunction *MF = getParent(); + + // Performance might be harmed on HW that implements branching using exec mask + // where both sides of the branches are always executed. + if (MF->getTarget().requiresStructuredCFG()) + return false; + + // We may need to update this's terminator, but we can't do that if + // AnalyzeBranch fails. If this uses a jump table, we won't touch it. + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector<MachineOperand, 4> Cond; + // AnalyzeBanch should modify this, since we did not allow modification. + if (TII->analyzeBranch(*const_cast<MachineBasicBlock *>(this), TBB, FBB, Cond, + /*AllowModify*/ false)) + return false; + + // Avoid bugpoint weirdness: A block may end with a conditional branch but + // jumps to the same MBB is either case. We have duplicate CFG edges in that + // case that we can't handle. Since this never happens in properly optimized + // code, just skip those edges. + if (TBB && TBB == FBB) { + DEBUG(dbgs() << "Won't split critical edge after degenerate BB#" + << getNumber() << '\n'); + return false; + } + return true; +} + /// Prepare MI to be removed from its bundle. This fixes bundle flags on MI's /// neighboring instructions so the bundle won't be broken by removing MI. static void unbundleSingleMI(MachineInstr *MI) { @@ -1200,7 +1214,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, --I; MachineOperandIteratorBase::PhysRegInfo Info = - ConstMIOperands(I).analyzePhysReg(Reg, TRI); + ConstMIOperands(*I).analyzePhysReg(Reg, TRI); // Defs happen after uses so they take precedence if both are present. @@ -1208,8 +1222,15 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, if (Info.DeadDef) return LQR_Dead; // Register is (at least partially) live after a def. - if (Info.Defined) - return LQR_Live; + if (Info.Defined) { + if (!Info.PartialDeadDef) + return LQR_Live; + // As soon as we saw a partial definition (dead or not), + // we cannot tell if the value is partial live without + // tracking the lanemasks. We are not going to do this, + // so fall back on the remaining of the analysis. + break; + } // Register is dead after a full kill or clobber and no def. if (Info.Killed || Info.Clobbered) return LQR_Dead; @@ -1238,7 +1259,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, if (I != end()) { for (++I; I != end() && N > 0; ++I, --N) { MachineOperandIteratorBase::PhysRegInfo Info = - ConstMIOperands(I).analyzePhysReg(Reg, TRI); + ConstMIOperands(*I).analyzePhysReg(Reg, TRI); // Register is live when we read it here. if (Info.Read) diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 9119e31..6c0f99f 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -20,43 +20,44 @@ #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "block-freq" #ifndef NDEBUG -enum GVDAGType { - GVDT_None, - GVDT_Fraction, - GVDT_Integer -}; -static cl::opt<GVDAGType> -ViewMachineBlockFreqPropagationDAG("view-machine-block-freq-propagation-dags", - cl::Hidden, - cl::desc("Pop up a window to show a dag displaying how machine block " - "frequencies propagate through the CFG."), - cl::values( - clEnumValN(GVDT_None, "none", - "do not display graphs."), - clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " - "fractional block frequency representation."), - clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " - "integer fractional block frequency representation."), - clEnumValEnd)); +static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG( + "view-machine-block-freq-propagation-dags", cl::Hidden, + cl::desc("Pop up a window to show a dag displaying how machine block " + "frequencies propagate through the CFG."), + cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."), + clEnumValN(GVDT_Fraction, "fraction", + "display a graph using the " + "fractional block frequency representation."), + clEnumValN(GVDT_Integer, "integer", + "display a graph using the raw " + "integer fractional block frequency representation."), + clEnumValN(GVDT_Count, "count", "display a graph using the real " + "profile count if available."), + + clEnumValEnd)); + +extern cl::opt<std::string> ViewBlockFreqFuncName; +extern cl::opt<unsigned> ViewHotFreqPercent; namespace llvm { -template <> -struct GraphTraits<MachineBlockFrequencyInfo *> { +template <> struct GraphTraits<MachineBlockFrequencyInfo *> { typedef const MachineBasicBlock NodeType; typedef MachineBasicBlock::const_succ_iterator ChildIteratorType; typedef MachineFunction::const_iterator nodes_iterator; - static inline - const NodeType *getEntryNode(const MachineBlockFrequencyInfo *G) { + static inline const NodeType * + getEntryNode(const MachineBlockFrequencyInfo *G) { return &G->getFunction()->front(); } @@ -77,38 +78,33 @@ struct GraphTraits<MachineBlockFrequencyInfo *> { } }; -template<> -struct DOTGraphTraits<MachineBlockFrequencyInfo*> : - public DefaultDOTGraphTraits { - explicit DOTGraphTraits(bool isSimple=false) : - DefaultDOTGraphTraits(isSimple) {} - - static std::string getGraphName(const MachineBlockFrequencyInfo *G) { - return G->getFunction()->getName(); - } +typedef BFIDOTGraphTraitsBase<MachineBlockFrequencyInfo, + MachineBranchProbabilityInfo> + MBFIDOTGraphTraitsBase; +template <> +struct DOTGraphTraits<MachineBlockFrequencyInfo *> + : public MBFIDOTGraphTraitsBase { + explicit DOTGraphTraits(bool isSimple = false) + : MBFIDOTGraphTraitsBase(isSimple) {} std::string getNodeLabel(const MachineBasicBlock *Node, const MachineBlockFrequencyInfo *Graph) { - std::string Result; - raw_string_ostream OS(Result); - - OS << Node->getName().str() << ":"; - switch (ViewMachineBlockFreqPropagationDAG) { - case GVDT_Fraction: - Graph->printBlockFreq(OS, Node); - break; - case GVDT_Integer: - OS << Graph->getBlockFreq(Node).getFrequency(); - break; - case GVDT_None: - llvm_unreachable("If we are not supposed to render a graph we should " - "never reach this point."); - } - - return Result; + return MBFIDOTGraphTraitsBase::getNodeLabel( + Node, Graph, ViewMachineBlockFreqPropagationDAG); } -}; + std::string getNodeAttributes(const MachineBasicBlock *Node, + const MachineBlockFrequencyInfo *Graph) { + return MBFIDOTGraphTraitsBase::getNodeAttributes(Node, Graph, + ViewHotFreqPercent); + } + + std::string getEdgeAttributes(const MachineBasicBlock *Node, EdgeIter EI, + const MachineBlockFrequencyInfo *MBFI) { + return MBFIDOTGraphTraitsBase::getEdgeAttributes( + Node, EI, MBFI, MBFI->getMBPI(), ViewHotFreqPercent); + } +}; } // end namespace llvm #endif @@ -122,9 +118,8 @@ INITIALIZE_PASS_END(MachineBlockFrequencyInfo, "machine-block-freq", char MachineBlockFrequencyInfo::ID = 0; - -MachineBlockFrequencyInfo:: -MachineBlockFrequencyInfo() :MachineFunctionPass(ID) { +MachineBlockFrequencyInfo::MachineBlockFrequencyInfo() + : MachineFunctionPass(ID) { initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry()); } @@ -145,7 +140,9 @@ bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) { MBFI.reset(new ImplType); MBFI->calculate(F, MBPI, MLI); #ifndef NDEBUG - if (ViewMachineBlockFreqPropagationDAG != GVDT_None) { + if (ViewMachineBlockFreqPropagationDAG != GVDT_None && + (ViewBlockFreqFuncName.empty() || + F.getName().equals(ViewBlockFreqFuncName))) { view(); } #endif @@ -163,19 +160,29 @@ void MachineBlockFrequencyInfo::view() const { "MachineBlockFrequencyDAGs"); #else errs() << "MachineBlockFrequencyInfo::view is only available in debug builds " - "on systems with Graphviz or gv!\n"; + "on systems with Graphviz or gv!\n"; #endif // NDEBUG } -BlockFrequency MachineBlockFrequencyInfo:: -getBlockFreq(const MachineBasicBlock *MBB) const { +BlockFrequency +MachineBlockFrequencyInfo::getBlockFreq(const MachineBasicBlock *MBB) const { return MBFI ? MBFI->getBlockFreq(MBB) : 0; } +Optional<uint64_t> MachineBlockFrequencyInfo::getBlockProfileCount( + const MachineBasicBlock *MBB) const { + const Function *F = MBFI->getFunction()->getFunction(); + return MBFI ? MBFI->getBlockProfileCount(*F, MBB) : None; +} + const MachineFunction *MachineBlockFrequencyInfo::getFunction() const { return MBFI ? MBFI->getFunction() : nullptr; } +const MachineBranchProbabilityInfo *MachineBlockFrequencyInfo::getMBPI() const { + return MBFI ? &MBFI->getBPI() : nullptr; +} + raw_ostream & MachineBlockFrequencyInfo::printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const { diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp index f5e3056..03dda8b 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -26,6 +26,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "BranchFolding.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -62,10 +64,12 @@ static cl::opt<unsigned> AlignAllBlock("align-all-blocks", "blocks in the function."), cl::init(0), cl::Hidden); -static cl::opt<unsigned> - AlignAllLoops("align-all-loops", - cl::desc("Force the alignment of all loops in the function."), - cl::init(0), cl::Hidden); +static cl::opt<unsigned> AlignAllNonFallThruBlocks( + "align-all-nofallthru-blocks", + cl::desc("Force the alignment of all " + "blocks that have no fall-through predecessors (i.e. don't add " + "nops that are executed)."), + cl::init(0), cl::Hidden); // FIXME: Find a good default for this flag and remove the flag. static cl::opt<unsigned> ExitBlockBias( @@ -97,10 +101,15 @@ static cl::opt<bool> cl::desc("Model the cost of loop rotation more " "precisely by using profile data."), cl::init(false), cl::Hidden); +static cl::opt<bool> + ForcePreciseRotationCost("force-precise-rotation-cost", + cl::desc("Force the use of precise cost " + "loop rotation strategy."), + cl::init(false), cl::Hidden); static cl::opt<unsigned> MisfetchCost( "misfetch-cost", - cl::desc("Cost that models the probablistic risk of an instruction " + cl::desc("Cost that models the probabilistic risk of an instruction " "misfetch due to a jump comparing to falling through, whose cost " "is zero."), cl::init(1), cl::Hidden); @@ -109,6 +118,15 @@ static cl::opt<unsigned> JumpInstCost("jump-inst-cost", cl::desc("Cost of jump instructions."), cl::init(1), cl::Hidden); +static cl::opt<bool> +BranchFoldPlacement("branch-fold-placement", + cl::desc("Perform branch folding during placement. " + "Reduces code size."), + cl::init(true), cl::Hidden); + +extern cl::opt<unsigned> StaticLikelyProb; +extern cl::opt<unsigned> ProfileLikelyProb; + namespace { class BlockChain; /// \brief Type for our function-wide basic block -> block chain mapping. @@ -149,7 +167,7 @@ public: /// function. It also registers itself as the chain that block participates /// in with the BlockToChain mapping. BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB) - : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) { + : Blocks(1, BB), BlockToChain(BlockToChain), UnscheduledPredecessors(0) { assert(BB && "Cannot create a chain with a null basic block"); BlockToChain[BB] = this; } @@ -201,11 +219,16 @@ public: } #endif // NDEBUG - /// \brief Count of predecessors within the loop currently being processed. + /// \brief Count of predecessors of any block within the chain which have not + /// yet been scheduled. In general, we will delay scheduling this chain + /// until those predecessors are scheduled (or we find a sufficiently good + /// reason to override this heuristic.) Note that when forming loop chains, + /// blocks outside the loop are ignored and treated as if they were already + /// scheduled. /// - /// This count is updated at each loop we process to represent the number of - /// in-loop predecessors of this chain. - unsigned LoopPredecessors; + /// Note: This field is reinitialized multiple times - once for each loop, + /// and then once for the function as a whole. + unsigned UnscheduledPredecessors; }; } @@ -214,14 +237,21 @@ class MachineBlockPlacement : public MachineFunctionPass { /// \brief A typedef for a block filter set. typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet; + /// \brief work lists of blocks that are ready to be laid out + SmallVector<MachineBasicBlock *, 16> BlockWorkList; + SmallVector<MachineBasicBlock *, 16> EHPadWorkList; + + /// \brief Machine Function + MachineFunction *F; + /// \brief A handle to the branch probability pass. const MachineBranchProbabilityInfo *MBPI; /// \brief A handle to the function-wide block frequency pass. - const MachineBlockFrequencyInfo *MBFI; + std::unique_ptr<BranchFolder::MBFIWrapper> MBFI; /// \brief A handle to the loop info. - const MachineLoopInfo *MLI; + MachineLoopInfo *MLI; /// \brief A handle to the target's instruction info. const TargetInstrInfo *TII; @@ -254,33 +284,56 @@ class MachineBlockPlacement : public MachineFunctionPass { DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain; void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, - SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter = nullptr); + BranchProbability + collectViableSuccessors(MachineBasicBlock *BB, BlockChain &Chain, + const BlockFilterSet *BlockFilter, + SmallVector<MachineBasicBlock *, 4> &Successors); + bool shouldPredBlockBeOutlined(MachineBasicBlock *BB, MachineBasicBlock *Succ, + BlockChain &Chain, + const BlockFilterSet *BlockFilter, + BranchProbability SuccProb, + BranchProbability HotProb); + bool + hasBetterLayoutPredecessor(MachineBasicBlock *BB, MachineBasicBlock *Succ, + BlockChain &SuccChain, BranchProbability SuccProb, + BranchProbability RealSuccProb, BlockChain &Chain, + const BlockFilterSet *BlockFilter); MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB, BlockChain &Chain, const BlockFilterSet *BlockFilter); MachineBasicBlock * selectBestCandidateBlock(BlockChain &Chain, - SmallVectorImpl<MachineBasicBlock *> &WorkList, - const BlockFilterSet *BlockFilter); + SmallVectorImpl<MachineBasicBlock *> &WorkList); MachineBasicBlock * - getFirstUnplacedBlock(MachineFunction &F, const BlockChain &PlacedChain, + getFirstUnplacedBlock(const BlockChain &PlacedChain, MachineFunction::iterator &PrevUnplacedBlockIt, const BlockFilterSet *BlockFilter); + + /// \brief Add a basic block to the work list if it is appropriate. + /// + /// If the optional parameter BlockFilter is provided, only MBB + /// present in the set will be added to the worklist. If nullptr + /// is provided, no filtering occurs. + void fillWorkLists(MachineBasicBlock *MBB, + SmallPtrSetImpl<BlockChain *> &UpdatedPreds, + const BlockFilterSet *BlockFilter); void buildChain(MachineBasicBlock *BB, BlockChain &Chain, - SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *findBestLoopTop(MachineLoop &L, const BlockFilterSet &LoopBlockSet); - MachineBasicBlock *findBestLoopExit(MachineFunction &F, MachineLoop &L, + MachineBasicBlock *findBestLoopExit(MachineLoop &L, const BlockFilterSet &LoopBlockSet); - BlockFilterSet collectLoopBlockSet(MachineFunction &F, MachineLoop &L); - void buildLoopChains(MachineFunction &F, MachineLoop &L); + BlockFilterSet collectLoopBlockSet(MachineLoop &L); + void buildLoopChains(MachineLoop &L); void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB, const BlockFilterSet &LoopBlockSet); void rotateLoopWithProfile(BlockChain &LoopChain, MachineLoop &L, const BlockFilterSet &LoopBlockSet); - void buildCFGChains(MachineFunction &F); + void collectMustExecuteBBs(); + void buildCFGChains(); + void optimizeBranches(); + void alignBlocks(); public: static char ID; // Pass identification, replacement for typeid @@ -295,6 +348,7 @@ public: AU.addRequired<MachineBlockFrequencyInfo>(); AU.addRequired<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); + AU.addRequired<TargetPassConfig>(); MachineFunctionPass::getAnalysisUsage(AU); } }; @@ -319,18 +373,7 @@ static std::string getBlockName(MachineBasicBlock *BB) { std::string Result; raw_string_ostream OS(Result); OS << "BB#" << BB->getNumber(); - OS << " (derived from LLVM BB '" << BB->getName() << "')"; - OS.flush(); - return Result; -} - -/// \brief Helper to print the number of a MBB. -/// -/// Only used by debug logging. -static std::string getBlockNum(MachineBasicBlock *BB) { - std::string Result; - raw_string_ostream OS(Result); - OS << "BB#" << BB->getNumber(); + OS << " ('" << BB->getName() << "')"; OS.flush(); return Result; } @@ -344,7 +387,6 @@ static std::string getBlockNum(MachineBasicBlock *BB) { /// chain which reach the zero-predecessor state to the worklist passed in. void MachineBlockPlacement::markChainSuccessors( BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, - SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter) { // Walk all the blocks in this chain, marking their successors as having // a predecessor placed. @@ -363,30 +405,26 @@ void MachineBlockPlacement::markChainSuccessors( // This is a cross-chain edge that is within the loop, so decrement the // loop predecessor count of the destination chain. - if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0) - BlockWorkList.push_back(*SuccChain.begin()); + if (SuccChain.UnscheduledPredecessors == 0 || + --SuccChain.UnscheduledPredecessors > 0) + continue; + + auto *MBB = *SuccChain.begin(); + if (MBB->isEHPad()) + EHPadWorkList.push_back(MBB); + else + BlockWorkList.push_back(MBB); } } } -/// \brief Select the best successor for a block. -/// -/// This looks across all successors of a particular block and attempts to -/// select the "best" one to be the layout successor. It only considers direct -/// successors which also pass the block filter. It will attempt to avoid -/// breaking CFG structure, but cave and break such structures in the case of -/// very hot successor edges. -/// -/// \returns The best successor block found, or null if none are viable. -MachineBasicBlock * -MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, - BlockChain &Chain, - const BlockFilterSet *BlockFilter) { - const BranchProbability HotProb(4, 5); // 80% - - MachineBasicBlock *BestSucc = nullptr; - auto BestProb = BranchProbability::getZero(); - +/// This helper function collects the set of successors of block +/// \p BB that are allowed to be its layout successors, and return +/// the total branch probability of edges from \p BB to those +/// blocks. +BranchProbability MachineBlockPlacement::collectViableSuccessors( + MachineBasicBlock *BB, BlockChain &Chain, const BlockFilterSet *BlockFilter, + SmallVector<MachineBasicBlock *, 4> &Successors) { // Adjust edge probabilities by excluding edges pointing to blocks that is // either not in BlockFilter or is already in the current chain. Consider the // following CFG: @@ -400,20 +438,17 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, // Assume A->C is very hot (>90%), and C->D has a 50% probability, then after // A->C is chosen as a fall-through, D won't be selected as a successor of C // due to CFG constraint (the probability of C->D is not greater than - // HotProb). If we exclude E that is not in BlockFilter when calculating the - // probability of C->D, D will be selected and we will get A C D B as the - // layout of this loop. + // HotProb to break top-order). If we exclude E that is not in BlockFilter + // when calculating the probability of C->D, D will be selected and we + // will get A C D B as the layout of this loop. auto AdjustedSumProb = BranchProbability::getOne(); - SmallVector<MachineBasicBlock *, 4> Successors; for (MachineBasicBlock *Succ : BB->successors()) { bool SkipSucc = false; - if (BlockFilter && !BlockFilter->count(Succ)) { + if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) { SkipSucc = true; } else { BlockChain *SuccChain = BlockToChain[Succ]; if (SuccChain == &Chain) { - DEBUG(dbgs() << " " << getBlockName(Succ) - << " -> Already merged!\n"); SkipSucc = true; } else if (Succ != *SuccChain->begin()) { DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Mid chain!\n"); @@ -426,78 +461,267 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, Successors.push_back(Succ); } - DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); - for (MachineBasicBlock *Succ : Successors) { - BranchProbability SuccProb; - uint32_t SuccProbN = MBPI->getEdgeProbability(BB, Succ).getNumerator(); - uint32_t SuccProbD = AdjustedSumProb.getNumerator(); - if (SuccProbN >= SuccProbD) - SuccProb = BranchProbability::getOne(); - else - SuccProb = BranchProbability(SuccProbN, SuccProbD); - - // If we outline optional branches, look whether Succ is unavoidable, i.e. - // dominates all terminators of the MachineFunction. If it does, other - // successors must be optional. Don't do this for cold branches. - if (OutlineOptionalBranches && SuccProb > HotProb.getCompl() && - UnavoidableBlocks.count(Succ) > 0) { - auto HasShortOptionalBranch = [&]() { - for (MachineBasicBlock *Pred : Succ->predecessors()) { - // Check whether there is an unplaced optional branch. - if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) || - BlockToChain[Pred] == &Chain) - continue; - // Check whether the optional branch has exactly one BB. - if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB) - continue; - // Check whether the optional branch is small. - if (Pred->size() < OutlineOptionalThreshold) - return true; - } + return AdjustedSumProb; +} + +/// The helper function returns the branch probability that is adjusted +/// or normalized over the new total \p AdjustedSumProb. +static BranchProbability +getAdjustedProbability(BranchProbability OrigProb, + BranchProbability AdjustedSumProb) { + BranchProbability SuccProb; + uint32_t SuccProbN = OrigProb.getNumerator(); + uint32_t SuccProbD = AdjustedSumProb.getNumerator(); + if (SuccProbN >= SuccProbD) + SuccProb = BranchProbability::getOne(); + else + SuccProb = BranchProbability(SuccProbN, SuccProbD); + + return SuccProb; +} + +/// When the option OutlineOptionalBranches is on, this method +/// checks if the fallthrough candidate block \p Succ (of block +/// \p BB) also has other unscheduled predecessor blocks which +/// are also successors of \p BB (forming triangular shape CFG). +/// If none of such predecessors are small, it returns true. +/// The caller can choose to select \p Succ as the layout successors +/// so that \p Succ's predecessors (optional branches) can be +/// outlined. +/// FIXME: fold this with more general layout cost analysis. +bool MachineBlockPlacement::shouldPredBlockBeOutlined( + MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &Chain, + const BlockFilterSet *BlockFilter, BranchProbability SuccProb, + BranchProbability HotProb) { + if (!OutlineOptionalBranches) + return false; + // If we outline optional branches, look whether Succ is unavoidable, i.e. + // dominates all terminators of the MachineFunction. If it does, other + // successors must be optional. Don't do this for cold branches. + if (SuccProb > HotProb.getCompl() && UnavoidableBlocks.count(Succ) > 0) { + for (MachineBasicBlock *Pred : Succ->predecessors()) { + // Check whether there is an unplaced optional branch. + if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) || + BlockToChain[Pred] == &Chain) + continue; + // Check whether the optional branch has exactly one BB. + if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB) + continue; + // Check whether the optional branch is small. + if (Pred->size() < OutlineOptionalThreshold) return false; - }; - if (!HasShortOptionalBranch()) - return Succ; } + return true; + } else + return false; +} - // Only consider successors which are either "hot", or wouldn't violate - // any CFG constraints. - BlockChain &SuccChain = *BlockToChain[Succ]; - if (SuccChain.LoopPredecessors != 0) { - if (SuccProb < HotProb) { - DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb - << " (prob) (CFG conflict)\n"); - continue; - } +// When profile is not present, return the StaticLikelyProb. +// When profile is available, we need to handle the triangle-shape CFG. +static BranchProbability getLayoutSuccessorProbThreshold( + MachineBasicBlock *BB) { + if (!BB->getParent()->getFunction()->getEntryCount()) + return BranchProbability(StaticLikelyProb, 100); + if (BB->succ_size() == 2) { + const MachineBasicBlock *Succ1 = *BB->succ_begin(); + const MachineBasicBlock *Succ2 = *(BB->succ_begin() + 1); + if (Succ1->isSuccessor(Succ2) || Succ2->isSuccessor(Succ1)) { + /* See case 1 below for the cost analysis. For BB->Succ to + * be taken with smaller cost, the following needs to hold: + * Prob(BB->Succ) > 2* Prob(BB->Pred) + * So the threshold T + * T = 2 * (1-Prob(BB->Pred). Since T + Prob(BB->Pred) == 1, + * We have T + T/2 = 1, i.e. T = 2/3. Also adding user specified + * branch bias, we have + * T = (2/3)*(ProfileLikelyProb/50) + * = (2*ProfileLikelyProb)/150) + */ + return BranchProbability(2 * ProfileLikelyProb, 150); + } + } + return BranchProbability(ProfileLikelyProb, 100); +} - // Make sure that a hot successor doesn't have a globally more - // important predecessor. - auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ); - BlockFrequency CandidateEdgeFreq = - MBFI->getBlockFreq(BB) * RealSuccProb * HotProb.getCompl(); - bool BadCFGConflict = false; - for (MachineBasicBlock *Pred : Succ->predecessors()) { - if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) || - BlockToChain[Pred] == &Chain) - continue; - BlockFrequency PredEdgeFreq = - MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ); - if (PredEdgeFreq >= CandidateEdgeFreq) { - BadCFGConflict = true; - break; - } - } - if (BadCFGConflict) { - DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb - << " (prob) (non-cold CFG conflict)\n"); - continue; - } +/// Checks to see if the layout candidate block \p Succ has a better layout +/// predecessor than \c BB. If yes, returns true. +bool MachineBlockPlacement::hasBetterLayoutPredecessor( + MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &SuccChain, + BranchProbability SuccProb, BranchProbability RealSuccProb, + BlockChain &Chain, const BlockFilterSet *BlockFilter) { + + // There isn't a better layout when there are no unscheduled predecessors. + if (SuccChain.UnscheduledPredecessors == 0) + return false; + + // There are two basic scenarios here: + // ------------------------------------- + // Case 1: triangular shape CFG (if-then): + // BB + // | \ + // | \ + // | Pred + // | / + // Succ + // In this case, we are evaluating whether to select edge -> Succ, e.g. + // set Succ as the layout successor of BB. Picking Succ as BB's + // successor breaks the CFG constraints (FIXME: define these constraints). + // With this layout, Pred BB + // is forced to be outlined, so the overall cost will be cost of the + // branch taken from BB to Pred, plus the cost of back taken branch + // from Pred to Succ, as well as the additional cost associated + // with the needed unconditional jump instruction from Pred To Succ. + + // The cost of the topological order layout is the taken branch cost + // from BB to Succ, so to make BB->Succ a viable candidate, the following + // must hold: + // 2 * freq(BB->Pred) * taken_branch_cost + unconditional_jump_cost + // < freq(BB->Succ) * taken_branch_cost. + // Ignoring unconditional jump cost, we get + // freq(BB->Succ) > 2 * freq(BB->Pred), i.e., + // prob(BB->Succ) > 2 * prob(BB->Pred) + // + // When real profile data is available, we can precisely compute the + // probability threshold that is needed for edge BB->Succ to be considered. + // Without profile data, the heuristic requires the branch bias to be + // a lot larger to make sure the signal is very strong (e.g. 80% default). + // ----------------------------------------------------------------- + // Case 2: diamond like CFG (if-then-else): + // S + // / \ + // | \ + // BB Pred + // \ / + // Succ + // .. + // + // The current block is BB and edge BB->Succ is now being evaluated. + // Note that edge S->BB was previously already selected because + // prob(S->BB) > prob(S->Pred). + // At this point, 2 blocks can be placed after BB: Pred or Succ. If we + // choose Pred, we will have a topological ordering as shown on the left + // in the picture below. If we choose Succ, we have the solution as shown + // on the right: + // + // topo-order: + // + // S----- ---S + // | | | | + // ---BB | | BB + // | | | | + // | pred-- | Succ-- + // | | | | + // ---succ ---pred-- + // + // cost = freq(S->Pred) + freq(BB->Succ) cost = 2 * freq (S->Pred) + // = freq(S->Pred) + freq(S->BB) + // + // If we have profile data (i.e, branch probabilities can be trusted), the + // cost (number of taken branches) with layout S->BB->Succ->Pred is 2 * + // freq(S->Pred) while the cost of topo order is freq(S->Pred) + freq(S->BB). + // We know Prob(S->BB) > Prob(S->Pred), so freq(S->BB) > freq(S->Pred), which + // means the cost of topological order is greater. + // When profile data is not available, however, we need to be more + // conservative. If the branch prediction is wrong, breaking the topo-order + // will actually yield a layout with large cost. For this reason, we need + // strong biased branch at block S with Prob(S->BB) in order to select + // BB->Succ. This is equivalent to looking the CFG backward with backward + // edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without + // profile data). + + BranchProbability HotProb = getLayoutSuccessorProbThreshold(BB); + + // Forward checking. For case 2, SuccProb will be 1. + if (SuccProb < HotProb) { + DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb + << " (prob) (CFG conflict)\n"); + return true; + } + + // Make sure that a hot successor doesn't have a globally more + // important predecessor. + BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb; + bool BadCFGConflict = false; + + for (MachineBasicBlock *Pred : Succ->predecessors()) { + if (Pred == Succ || BlockToChain[Pred] == &SuccChain || + (BlockFilter && !BlockFilter->count(Pred)) || + BlockToChain[Pred] == &Chain) + continue; + // Do backward checking. For case 1, it is actually redundant check. For + // case 2 above, we need a backward checking to filter out edges that are + // not 'strongly' biased. With profile data available, the check is mostly + // redundant too (when threshold prob is set at 50%) unless S has more than + // two successors. + // BB Pred + // \ / + // Succ + // We select edge BB->Succ if + // freq(BB->Succ) > freq(Succ) * HotProb + // i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) * + // HotProb + // i.e. freq((BB->Succ) * (1 - HotProb) > freq(Pred->Succ) * HotProb + BlockFrequency PredEdgeFreq = + MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ); + if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) { + BadCFGConflict = true; + break; } + } + if (BadCFGConflict) { DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb - << " (prob)" - << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "") - << "\n"); + << " (prob) (non-cold CFG conflict)\n"); + return true; + } + + return false; +} + +/// \brief Select the best successor for a block. +/// +/// This looks across all successors of a particular block and attempts to +/// select the "best" one to be the layout successor. It only considers direct +/// successors which also pass the block filter. It will attempt to avoid +/// breaking CFG structure, but cave and break such structures in the case of +/// very hot successor edges. +/// +/// \returns The best successor block found, or null if none are viable. +MachineBasicBlock * +MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, + BlockChain &Chain, + const BlockFilterSet *BlockFilter) { + const BranchProbability HotProb(StaticLikelyProb, 100); + + MachineBasicBlock *BestSucc = nullptr; + auto BestProb = BranchProbability::getZero(); + + SmallVector<MachineBasicBlock *, 4> Successors; + auto AdjustedSumProb = + collectViableSuccessors(BB, Chain, BlockFilter, Successors); + + DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); + for (MachineBasicBlock *Succ : Successors) { + auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ); + BranchProbability SuccProb = + getAdjustedProbability(RealSuccProb, AdjustedSumProb); + + // This heuristic is off by default. + if (shouldPredBlockBeOutlined(BB, Succ, Chain, BlockFilter, SuccProb, + HotProb)) + return Succ; + + BlockChain &SuccChain = *BlockToChain[Succ]; + // Skip the edge \c BB->Succ if block \c Succ has a better layout + // predecessor that yields lower global cost. + if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb, + Chain, BlockFilter)) + continue; + + DEBUG( + dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb + << " (prob)" + << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "") + << "\n"); if (BestSucc && BestProb >= SuccProb) continue; BestSucc = Succ; @@ -513,12 +737,11 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, /// profitable only really makes sense in the context of a loop. This returns /// the most frequently visited block in the worklist, which in the case of /// a loop, is the one most desirable to be physically close to the rest of the -/// loop body in order to improve icache behavior. +/// loop body in order to improve i-cache behavior. /// /// \returns The best block found, or null if none are viable. MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( - BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList, - const BlockFilterSet *BlockFilter) { + BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) { // Once we need to walk the worklist looking for a candidate, cleanup the // worklist of already placed entries. // FIXME: If this shows up on profiles, it could be folded (at the cost of @@ -529,24 +752,51 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( }), WorkList.end()); + if (WorkList.empty()) + return nullptr; + + bool IsEHPad = WorkList[0]->isEHPad(); + MachineBasicBlock *BestBlock = nullptr; BlockFrequency BestFreq; for (MachineBasicBlock *MBB : WorkList) { + assert(MBB->isEHPad() == IsEHPad); + BlockChain &SuccChain = *BlockToChain[MBB]; - if (&SuccChain == &Chain) { - DEBUG(dbgs() << " " << getBlockName(MBB) << " -> Already merged!\n"); + if (&SuccChain == &Chain) continue; - } - assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block"); + + assert(SuccChain.UnscheduledPredecessors == 0 && "Found CFG-violating block"); BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB); DEBUG(dbgs() << " " << getBlockName(MBB) << " -> "; MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); - if (BestBlock && BestFreq >= CandidateFreq) + + // For ehpad, we layout the least probable first as to avoid jumping back + // from least probable landingpads to more probable ones. + // + // FIXME: Using probability is probably (!) not the best way to achieve + // this. We should probably have a more principled approach to layout + // cleanup code. + // + // The goal is to get: + // + // +--------------------------+ + // | V + // InnerLp -> InnerCleanup OuterLp -> OuterCleanup -> Resume + // + // Rather than: + // + // +-------------------------------------+ + // V | + // OuterLp -> OuterCleanup -> Resume InnerLp -> InnerCleanup + if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq))) continue; + BestBlock = MBB; BestFreq = CandidateFreq; } + return BestBlock; } @@ -558,10 +808,10 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( /// LastUnplacedBlockIt. We update this iterator on each call to avoid /// re-scanning the entire sequence on repeated calls to this routine. MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( - MachineFunction &F, const BlockChain &PlacedChain, + const BlockChain &PlacedChain, MachineFunction::iterator &PrevUnplacedBlockIt, const BlockFilterSet *BlockFilter) { - for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E; + for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F->end(); I != E; ++I) { if (BlockFilter && !BlockFilter->count(&*I)) continue; @@ -576,22 +826,51 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( return nullptr; } +void MachineBlockPlacement::fillWorkLists( + MachineBasicBlock *MBB, + SmallPtrSetImpl<BlockChain *> &UpdatedPreds, + const BlockFilterSet *BlockFilter = nullptr) { + BlockChain &Chain = *BlockToChain[MBB]; + if (!UpdatedPreds.insert(&Chain).second) + return; + + assert(Chain.UnscheduledPredecessors == 0); + for (MachineBasicBlock *ChainBB : Chain) { + assert(BlockToChain[ChainBB] == &Chain); + for (MachineBasicBlock *Pred : ChainBB->predecessors()) { + if (BlockFilter && !BlockFilter->count(Pred)) + continue; + if (BlockToChain[Pred] == &Chain) + continue; + ++Chain.UnscheduledPredecessors; + } + } + + if (Chain.UnscheduledPredecessors != 0) + return; + + MBB = *Chain.begin(); + if (MBB->isEHPad()) + EHPadWorkList.push_back(MBB); + else + BlockWorkList.push_back(MBB); +} + void MachineBlockPlacement::buildChain( MachineBasicBlock *BB, BlockChain &Chain, - SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter) { - assert(BB); - assert(BlockToChain[BB] == &Chain); - MachineFunction &F = *BB->getParent(); - MachineFunction::iterator PrevUnplacedBlockIt = F.begin(); + assert(BB && "BB must not be null.\n"); + assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match.\n"); + MachineFunction::iterator PrevUnplacedBlockIt = F->begin(); MachineBasicBlock *LoopHeaderBB = BB; - markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter); + markChainSuccessors(Chain, LoopHeaderBB, BlockFilter); BB = *std::prev(Chain.end()); for (;;) { - assert(BB); - assert(BlockToChain[BB] == &Chain); - assert(*std::prev(Chain.end()) == BB); + assert(BB && "null block found at end of chain in loop."); + assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop."); + assert(*std::prev(Chain.end()) == BB && "BB Not found at end of chain."); + // Look for the best viable successor if there is one to place immediately // after this block. @@ -601,11 +880,12 @@ void MachineBlockPlacement::buildChain( // block among those we've identified as not violating the loop's CFG at // this point. This won't be a fallthrough, but it will increase locality. if (!BestSucc) - BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter); + BestSucc = selectBestCandidateBlock(Chain, BlockWorkList); + if (!BestSucc) + BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList); if (!BestSucc) { - BestSucc = - getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, BlockFilter); + BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt, BlockFilter); if (!BestSucc) break; @@ -615,18 +895,18 @@ void MachineBlockPlacement::buildChain( // Place this block, updating the datastructures to reflect its placement. BlockChain &SuccChain = *BlockToChain[BestSucc]; - // Zero out LoopPredecessors for the successor we're about to merge in case + // Zero out UnscheduledPredecessors for the successor we're about to merge in case // we selected a successor that didn't fit naturally into the CFG. - SuccChain.LoopPredecessors = 0; - DEBUG(dbgs() << "Merging from " << getBlockNum(BB) << " to " - << getBlockNum(BestSucc) << "\n"); - markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter); + SuccChain.UnscheduledPredecessors = 0; + DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to " + << getBlockName(BestSucc) << "\n"); + markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter); Chain.merge(BestSucc, &SuccChain); BB = *std::prev(Chain.end()); } DEBUG(dbgs() << "Finished forming chain for header block " - << getBlockNum(*Chain.begin()) << "\n"); + << getBlockName(*Chain.begin()) << "\n"); } /// \brief Find the best loop top block for layout. @@ -673,8 +953,10 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L, } // If no direct predecessor is fine, just use the loop header. - if (!BestPred) + if (!BestPred) { + DEBUG(dbgs() << " final top unchanged\n"); return L.getHeader(); + } // Walk backwards through any straight line of predecessors. while (BestPred->pred_size() == 1 && @@ -692,7 +974,7 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L, /// block to layout at the top of the loop. Typically this is done to maximize /// fallthrough opportunities. MachineBasicBlock * -MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L, +MachineBlockPlacement::findBestLoopExit(MachineLoop &L, const BlockFilterSet &LoopBlockSet) { // We don't want to layout the loop linearly in all cases. If the loop header // is just a normal basic block in the loop, we want to look for what block @@ -710,7 +992,7 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L, unsigned BestExitLoopDepth = 0; MachineBasicBlock *ExitingBB = nullptr; // If there are exits to outer loops, loop rotation can severely limit - // fallthrough opportunites unless it selects such an exit. Keep a set of + // fallthrough opportunities unless it selects such an exit. Keep a set of // blocks where rotating to exit with that block will reach an outer loop. SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop; @@ -780,7 +1062,6 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L, // Restore the old exiting state, no viable looping successor was found. ExitingBB = OldExitingBB; BestExitEdgeFreq = OldBestExitEdgeFreq; - continue; } } // Without a candidate exiting block or with only a single block in the @@ -973,7 +1254,7 @@ void MachineBlockPlacement::rotateLoopWithProfile( } } - DEBUG(dbgs() << "The cost of loop rotation by making " << getBlockNum(*Iter) + DEBUG(dbgs() << "The cost of loop rotation by making " << getBlockName(*Iter) << " to the top: " << Cost.getFrequency() << "\n"); if (Cost < SmallestRotationCost) { @@ -983,7 +1264,7 @@ void MachineBlockPlacement::rotateLoopWithProfile( } if (RotationPos != LoopChain.end()) { - DEBUG(dbgs() << "Rotate loop by making " << getBlockNum(*RotationPos) + DEBUG(dbgs() << "Rotate loop by making " << getBlockName(*RotationPos) << " to the top\n"); std::rotate(LoopChain.begin(), RotationPos, LoopChain.end()); } @@ -994,7 +1275,7 @@ void MachineBlockPlacement::rotateLoopWithProfile( /// When profile data is available, exclude cold blocks from the returned set; /// otherwise, collect all blocks in the loop. MachineBlockPlacement::BlockFilterSet -MachineBlockPlacement::collectLoopBlockSet(MachineFunction &F, MachineLoop &L) { +MachineBlockPlacement::collectLoopBlockSet(MachineLoop &L) { BlockFilterSet LoopBlockSet; // Filter cold blocks off from LoopBlockSet when profile data is available. @@ -1006,7 +1287,7 @@ MachineBlockPlacement::collectLoopBlockSet(MachineFunction &F, MachineLoop &L) { // will be merged into the first outer loop chain for which this block is not // cold anymore. This needs precise profile data and we only do this when // profile data is available. - if (F.getFunction()->getEntryCount()) { + if (F->getFunction()->getEntryCount()) { BlockFrequency LoopFreq(0); for (auto LoopPred : L.getHeader()->predecessors()) if (!L.contains(LoopPred)) @@ -1031,21 +1312,22 @@ MachineBlockPlacement::collectLoopBlockSet(MachineFunction &F, MachineLoop &L) { /// as much as possible. We can then stitch the chains together in a way which /// both preserves the topological structure and minimizes taken conditional /// branches. -void MachineBlockPlacement::buildLoopChains(MachineFunction &F, - MachineLoop &L) { +void MachineBlockPlacement::buildLoopChains(MachineLoop &L) { // First recurse through any nested loops, building chains for those inner // loops. for (MachineLoop *InnerLoop : L) - buildLoopChains(F, *InnerLoop); + buildLoopChains(*InnerLoop); - SmallVector<MachineBasicBlock *, 16> BlockWorkList; - BlockFilterSet LoopBlockSet = collectLoopBlockSet(F, L); + assert(BlockWorkList.empty()); + assert(EHPadWorkList.empty()); + BlockFilterSet LoopBlockSet = collectLoopBlockSet(L); // Check if we have profile data for this function. If yes, we will rotate // this loop by modeling costs more precisely which requires the profile data // for better layout. bool RotateLoopWithProfile = - PreciseRotationCost && F.getFunction()->getEntryCount(); + ForcePreciseRotationCost || + (PreciseRotationCost && F->getFunction()->getEntryCount()); // First check to see if there is an obviously preferable top block for the // loop. This will default to the header, but may end up as one of the @@ -1060,7 +1342,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, // branches by placing an exit edge at the bottom. MachineBasicBlock *ExitingBB = nullptr; if (!RotateLoopWithProfile && LoopTop == L.getHeader()) - ExitingBB = findBestLoopExit(F, L, LoopBlockSet); + ExitingBB = findBestLoopExit(L, LoopBlockSet); BlockChain &LoopChain = *BlockToChain[LoopTop]; @@ -1068,29 +1350,13 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, // walk the blocks, and use a set to prevent visiting a particular chain // twice. SmallPtrSet<BlockChain *, 4> UpdatedPreds; - assert(LoopChain.LoopPredecessors == 0); + assert(LoopChain.UnscheduledPredecessors == 0); UpdatedPreds.insert(&LoopChain); - for (MachineBasicBlock *LoopBB : LoopBlockSet) { - BlockChain &Chain = *BlockToChain[LoopBB]; - if (!UpdatedPreds.insert(&Chain).second) - continue; + for (MachineBasicBlock *LoopBB : LoopBlockSet) + fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet); - assert(Chain.LoopPredecessors == 0); - for (MachineBasicBlock *ChainBB : Chain) { - assert(BlockToChain[ChainBB] == &Chain); - for (MachineBasicBlock *Pred : ChainBB->predecessors()) { - if (BlockToChain[Pred] == &Chain || !LoopBlockSet.count(Pred)) - continue; - ++Chain.LoopPredecessors; - } - } - - if (Chain.LoopPredecessors == 0) - BlockWorkList.push_back(*Chain.begin()); - } - - buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet); + buildChain(LoopTop, LoopChain, &LoopBlockSet); if (RotateLoopWithProfile) rotateLoopWithProfile(LoopChain, L, LoopBlockSet); @@ -1100,7 +1366,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, DEBUG({ // Crash at the end so we get all of the debugging output first. bool BadLoop = false; - if (LoopChain.LoopPredecessors) { + if (LoopChain.UnscheduledPredecessors) { BadLoop = true; dbgs() << "Loop chain contains a block without its preds placed!\n" << " Loop header: " << getBlockName(*L.block_begin()) << "\n" @@ -1129,13 +1395,42 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, } assert(!BadLoop && "Detected problems with the placement of this loop."); }); + + BlockWorkList.clear(); + EHPadWorkList.clear(); +} + +/// When OutlineOpitonalBranches is on, this method collects BBs that +/// dominates all terminator blocks of the function \p F. +void MachineBlockPlacement::collectMustExecuteBBs() { + if (OutlineOptionalBranches) { + // Find the nearest common dominator of all of F's terminators. + MachineBasicBlock *Terminator = nullptr; + for (MachineBasicBlock &MBB : *F) { + if (MBB.succ_size() == 0) { + if (Terminator == nullptr) + Terminator = &MBB; + else + Terminator = MDT->findNearestCommonDominator(Terminator, &MBB); + } + } + + // MBBs dominating this common dominator are unavoidable. + UnavoidableBlocks.clear(); + for (MachineBasicBlock &MBB : *F) { + if (MDT->dominates(&MBB, Terminator)) { + UnavoidableBlocks.insert(&MBB); + } + } + } } -void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { +void MachineBlockPlacement::buildCFGChains() { // Ensure that every BB in the function has an associated chain to simplify // the assumptions of the remaining algorithm. SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch. - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + for (MachineFunction::iterator FI = F->begin(), FE = F->end(); FI != FE; + ++FI) { MachineBasicBlock *BB = &*FI; BlockChain *Chain = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); @@ -1144,7 +1439,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { for (;;) { Cond.clear(); MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. - if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough()) + if (!TII->analyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough()) break; MachineFunction::iterator NextFI = std::next(FI); @@ -1161,55 +1456,22 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { } } - if (OutlineOptionalBranches) { - // Find the nearest common dominator of all of F's terminators. - MachineBasicBlock *Terminator = nullptr; - for (MachineBasicBlock &MBB : F) { - if (MBB.succ_size() == 0) { - if (Terminator == nullptr) - Terminator = &MBB; - else - Terminator = MDT->findNearestCommonDominator(Terminator, &MBB); - } - } - - // MBBs dominating this common dominator are unavoidable. - UnavoidableBlocks.clear(); - for (MachineBasicBlock &MBB : F) { - if (MDT->dominates(&MBB, Terminator)) { - UnavoidableBlocks.insert(&MBB); - } - } - } + // Turned on with OutlineOptionalBranches option + collectMustExecuteBBs(); // Build any loop-based chains. for (MachineLoop *L : *MLI) - buildLoopChains(F, *L); + buildLoopChains(*L); - SmallVector<MachineBasicBlock *, 16> BlockWorkList; + assert(BlockWorkList.empty()); + assert(EHPadWorkList.empty()); SmallPtrSet<BlockChain *, 4> UpdatedPreds; - for (MachineBasicBlock &MBB : F) { - BlockChain &Chain = *BlockToChain[&MBB]; - if (!UpdatedPreds.insert(&Chain).second) - continue; - - assert(Chain.LoopPredecessors == 0); - for (MachineBasicBlock *ChainBB : Chain) { - assert(BlockToChain[ChainBB] == &Chain); - for (MachineBasicBlock *Pred : ChainBB->predecessors()) { - if (BlockToChain[Pred] == &Chain) - continue; - ++Chain.LoopPredecessors; - } - } - - if (Chain.LoopPredecessors == 0) - BlockWorkList.push_back(*Chain.begin()); - } + for (MachineBasicBlock &MBB : *F) + fillWorkLists(&MBB, UpdatedPreds); - BlockChain &FunctionChain = *BlockToChain[&F.front()]; - buildChain(&F.front(), FunctionChain, BlockWorkList); + BlockChain &FunctionChain = *BlockToChain[&F->front()]; + buildChain(&F->front(), FunctionChain); #ifndef NDEBUG typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType; @@ -1218,7 +1480,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Crash at the end so we get all of the debugging output first. bool BadFunc = false; FunctionBlockSetType FunctionBlockSet; - for (MachineBasicBlock &MBB : F) + for (MachineBasicBlock &MBB : *F) FunctionBlockSet.insert(&MBB); for (MachineBasicBlock *ChainBB : FunctionChain) @@ -1238,13 +1500,14 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { }); // Splice the blocks into place. - MachineFunction::iterator InsertPos = F.begin(); + MachineFunction::iterator InsertPos = F->begin(); + DEBUG(dbgs() << "[MBP] Function: "<< F->getName() << "\n"); for (MachineBasicBlock *ChainBB : FunctionChain) { DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain " : " ... ") << getBlockName(ChainBB) << "\n"); if (InsertPos != MachineFunction::iterator(ChainBB)) - F.splice(InsertPos, ChainBB); + F->splice(InsertPos, ChainBB); else ++InsertPos; @@ -1258,69 +1521,90 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // boiler plate. Cond.clear(); MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. - if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { - // The "PrevBB" is not yet updated to reflect current code layout, so, - // o. it may fall-through to a block without explict "goto" instruction - // before layout, and no longer fall-through it after layout; or - // o. just opposite. - // - // AnalyzeBranch() may return erroneous value for FBB when these two - // situations take place. For the first scenario FBB is mistakenly set - // NULL; for the 2nd scenario, the FBB, which is expected to be NULL, - // is mistakenly pointing to "*BI". - // - bool needUpdateBr = true; - if (!Cond.empty() && (!FBB || FBB == ChainBB)) { - PrevBB->updateTerminator(); - needUpdateBr = false; - Cond.clear(); - TBB = FBB = nullptr; - if (TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { - // FIXME: This should never take place. - TBB = FBB = nullptr; - } - } + // The "PrevBB" is not yet updated to reflect current code layout, so, + // o. it may fall-through to a block without explicit "goto" instruction + // before layout, and no longer fall-through it after layout; or + // o. just opposite. + // + // analyzeBranch() may return erroneous value for FBB when these two + // situations take place. For the first scenario FBB is mistakenly set NULL; + // for the 2nd scenario, the FBB, which is expected to be NULL, is + // mistakenly pointing to "*BI". + // Thus, if the future change needs to use FBB before the layout is set, it + // has to correct FBB first by using the code similar to the following: + // + // if (!Cond.empty() && (!FBB || FBB == ChainBB)) { + // PrevBB->updateTerminator(); + // Cond.clear(); + // TBB = FBB = nullptr; + // if (TII->analyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // // FIXME: This should never take place. + // TBB = FBB = nullptr; + // } + // } + if (!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond)) + PrevBB->updateTerminator(); + } + + // Fixup the last block. + Cond.clear(); + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. + if (!TII->analyzeBranch(F->back(), TBB, FBB, Cond)) + F->back().updateTerminator(); + + BlockWorkList.clear(); + EHPadWorkList.clear(); +} + +void MachineBlockPlacement::optimizeBranches() { + BlockChain &FunctionChain = *BlockToChain[&F->front()]; + SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch. + + // Now that all the basic blocks in the chain have the proper layout, + // make a final call to AnalyzeBranch with AllowModify set. + // Indeed, the target may be able to optimize the branches in a way we + // cannot because all branches may not be analyzable. + // E.g., the target may be able to remove an unconditional branch to + // a fallthrough when it occurs after predicated terminators. + for (MachineBasicBlock *ChainBB : FunctionChain) { + Cond.clear(); + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. + if (!TII->analyzeBranch(*ChainBB, TBB, FBB, Cond, /*AllowModify*/ true)) { // If PrevBB has a two-way branch, try to re-order the branches // such that we branch to the successor with higher probability first. if (TBB && !Cond.empty() && FBB && - MBPI->getEdgeProbability(PrevBB, FBB) > - MBPI->getEdgeProbability(PrevBB, TBB) && + MBPI->getEdgeProbability(ChainBB, FBB) > + MBPI->getEdgeProbability(ChainBB, TBB) && !TII->ReverseBranchCondition(Cond)) { DEBUG(dbgs() << "Reverse order of the two branches: " - << getBlockName(PrevBB) << "\n"); + << getBlockName(ChainBB) << "\n"); DEBUG(dbgs() << " Edge probability: " - << MBPI->getEdgeProbability(PrevBB, FBB) << " vs " - << MBPI->getEdgeProbability(PrevBB, TBB) << "\n"); + << MBPI->getEdgeProbability(ChainBB, FBB) << " vs " + << MBPI->getEdgeProbability(ChainBB, TBB) << "\n"); DebugLoc dl; // FIXME: this is nowhere - TII->RemoveBranch(*PrevBB); - TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl); - needUpdateBr = true; + TII->RemoveBranch(*ChainBB); + TII->InsertBranch(*ChainBB, FBB, TBB, Cond, dl); + ChainBB->updateTerminator(); } - if (needUpdateBr) - PrevBB->updateTerminator(); } } +} - // Fixup the last block. - Cond.clear(); - MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. - if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond)) - F.back().updateTerminator(); - +void MachineBlockPlacement::alignBlocks() { // Walk through the backedges of the function now that we have fully laid out // the basic blocks and align the destination of each backedge. We don't rely // exclusively on the loop info here so that we can align backedges in // unnatural CFGs and backedges that were introduced purely because of the // loop rotations done during this layout pass. - // FIXME: Use Function::optForSize(). - if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) + if (F->getFunction()->optForSize()) return; + BlockChain &FunctionChain = *BlockToChain[&F->front()]; if (FunctionChain.begin() == FunctionChain.end()) return; // Empty chain. const BranchProbability ColdProb(1, 5); // 20% - BlockFrequency EntryFreq = MBFI->getBlockFreq(&F.front()); + BlockFrequency EntryFreq = MBFI->getBlockFreq(&F->front()); BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb; for (MachineBasicBlock *ChainBB : FunctionChain) { if (ChainBB == *FunctionChain.begin()) @@ -1334,11 +1618,6 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { if (!L) continue; - if (AlignAllLoops) { - ChainBB->setAlignment(AlignAllLoops); - continue; - } - unsigned Align = TLI->getPrefLoopAlignment(L); if (!Align) continue; // Don't care about loop alignment. @@ -1380,31 +1659,67 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { } } -bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { - // Check for single-block functions and skip them. - if (std::next(F.begin()) == F.end()) +bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) return false; - if (skipOptnoneFunction(*F.getFunction())) + // Check for single-block functions and skip them. + if (std::next(MF.begin()) == MF.end()) return false; + F = &MF; MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); - MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + MBFI = llvm::make_unique<BranchFolder::MBFIWrapper>( + getAnalysis<MachineBlockFrequencyInfo>()); MLI = &getAnalysis<MachineLoopInfo>(); - TII = F.getSubtarget().getInstrInfo(); - TLI = F.getSubtarget().getTargetLowering(); + TII = MF.getSubtarget().getInstrInfo(); + TLI = MF.getSubtarget().getTargetLowering(); MDT = &getAnalysis<MachineDominatorTree>(); assert(BlockToChain.empty()); - buildCFGChains(F); + buildCFGChains(); + + // Changing the layout can create new tail merging opportunities. + TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); + // TailMerge can create jump into if branches that make CFG irreducible for + // HW that requires structured CFG. + bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && + PassConfig->getEnableTailMerge() && + BranchFoldPlacement; + // No tail merging opportunities if the block number is less than four. + if (MF.size() > 3 && EnableTailMerge) { + BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI, + *MBPI); + + if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), + getAnalysisIfAvailable<MachineModuleInfo>(), MLI, + /*AfterBlockPlacement=*/true)) { + // Redo the layout if tail merging creates/removes/moves blocks. + BlockToChain.clear(); + ChainAllocator.DestroyAll(); + buildCFGChains(); + } + } + + optimizeBranches(); + alignBlocks(); BlockToChain.clear(); ChainAllocator.DestroyAll(); if (AlignAllBlock) // Align all of the blocks in the function to a specific alignment. - for (MachineBasicBlock &MBB : F) + for (MachineBasicBlock &MBB : MF) MBB.setAlignment(AlignAllBlock); + else if (AlignAllNonFallThruBlocks) { + // Align all of the blocks that have no fall-through predecessors to a + // specific alignment. + for (auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) { + auto LayoutPred = std::prev(MBI); + if (!LayoutPred->isSuccessor(&*MBI)) + MBI->setAlignment(AlignAllNonFallThruBlocks); + } + } // We always return true as we have no way to track whether the final order // differs from the original order. diff --git a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp index cf6d401..fe73406 100644 --- a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -24,9 +24,21 @@ INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob", INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob", "Machine Branch Probability Analysis", false, true) +cl::opt<unsigned> + StaticLikelyProb("static-likely-prob", + cl::desc("branch probability threshold in percentage" + "to be considered very likely"), + cl::init(80), cl::Hidden); + +cl::opt<unsigned> ProfileLikelyProb( + "profile-likely-prob", + cl::desc("branch probability threshold in percentage to be considered" + " very likely when profile is available"), + cl::init(51), cl::Hidden); + char MachineBranchProbabilityInfo::ID = 0; -void MachineBranchProbabilityInfo::anchor() { } +void MachineBranchProbabilityInfo::anchor() {} BranchProbability MachineBranchProbabilityInfo::getEdgeProbability( const MachineBasicBlock *Src, @@ -42,11 +54,9 @@ BranchProbability MachineBranchProbabilityInfo::getEdgeProbability( std::find(Src->succ_begin(), Src->succ_end(), Dst)); } -bool -MachineBranchProbabilityInfo::isEdgeHot(const MachineBasicBlock *Src, - const MachineBasicBlock *Dst) const { - // Hot probability is at least 4/5 = 80% - static BranchProbability HotProb(4, 5); +bool MachineBranchProbabilityInfo::isEdgeHot( + const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const { + BranchProbability HotProb(StaticLikelyProb, 100); return getEdgeProbability(Src, Dst) > HotProb; } @@ -63,7 +73,7 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { } } - static BranchProbability HotProb(4, 5); + BranchProbability HotProb(StaticLikelyProb, 100); if (getEdgeProbability(MBB, MaxSucc) >= HotProb) return MaxSucc; diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp index aad376c..1209f73 100644 --- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp @@ -352,6 +352,12 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) { // This is a trivial form of alias analysis. return false; } + + // Ignore stack guard loads, otherwise the register that holds CSEed value may + // be spilled and get loaded back with corrupted data. + if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) + return false; + return true; } @@ -383,7 +389,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in // an immediate predecessor. We don't want to increase register pressure and // end up causing other computation to be spilled. - if (TII->isAsCheapAsAMove(MI)) { + if (TII->isAsCheapAsAMove(*MI)) { MachineBasicBlock *CSBB = CSMI->getParent(); MachineBasicBlock *BB = MI->getParent(); if (CSBB != BB && !CSBB->isSuccessor(BB)) @@ -472,8 +478,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Commute commutable instructions. bool Commuted = false; if (!FoundCSE && MI->isCommutable()) { - MachineInstr *NewMI = TII->commuteInstruction(MI); - if (NewMI) { + if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) { Commuted = true; FoundCSE = VNT.count(NewMI); if (NewMI != MI) { @@ -482,7 +487,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { Changed = true; } else if (!FoundCSE) // MI was changed but it didn't help, commute it back! - (void)TII->commuteInstruction(MI); + (void)TII->commuteInstruction(*MI); } } @@ -698,7 +703,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { } bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { - if (skipOptnoneFunction(*MF.getFunction())) + if (skipFunction(*MF.getFunction())) return false; TII = MF.getSubtarget().getInstrInfo(); diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp index fa43c4d..6b5c6ba 100644 --- a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp @@ -13,8 +13,8 @@ #define DEBUG_TYPE "machine-combiner" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -24,7 +24,6 @@ #include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetSchedule.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -41,6 +40,7 @@ class MachineCombiner : public MachineFunctionPass { const TargetRegisterInfo *TRI; MCSchedModel SchedModel; MachineRegisterInfo *MRI; + MachineLoopInfo *MLI; // Current MachineLoopInfo MachineTraceMetrics *Traces; MachineTraceMetrics::Ensemble *MinInstr; @@ -87,6 +87,7 @@ char &llvm::MachineCombinerID = MachineCombiner::ID; INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner", "Machine InstCombiner", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner", false, false) @@ -94,6 +95,7 @@ INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner", void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); AU.addPreserved<MachineLoopInfo>(); AU.addRequired<MachineTraceMetrics>(); AU.addPreserved<MachineTraceMetrics>(); @@ -156,7 +158,7 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, } else { MachineInstr *DefInstr = getOperandDef(MO); if (DefInstr) { - DepthOp = BlockTrace.getInstrCycles(DefInstr).Depth; + DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth; LatencyOp = TSchedModel.computeOperandLatency( DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); @@ -198,7 +200,7 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, RI++; MachineInstr *UseMO = RI->getParent(); unsigned LatencyOp = 0; - if (UseMO && BlockTrace.isDepInTrace(Root, UseMO)) { + if (UseMO && BlockTrace.isDepInTrace(*Root, *UseMO)) { LatencyOp = TSchedModel.computeOperandLatency( NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO, UseMO->findRegisterUseOperandIdx(MO.getReg())); @@ -250,7 +252,7 @@ bool MachineCombiner::improvesCriticalPathLen( // Get depth and latency of NewRoot and Root. unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace); - unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth; + unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth; DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n"; dbgs() << " NewRootDepth: " << NewRootDepth << "\n"; @@ -269,7 +271,7 @@ bool MachineCombiner::improvesCriticalPathLen( // even if the instruction depths (data dependency cycles) become worse. unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace); unsigned RootLatency = TSchedModel.computeInstrLatency(Root); - unsigned RootSlack = BlockTrace.getInstrSlack(Root); + unsigned RootSlack = BlockTrace.getInstrSlack(*Root); DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n"; dbgs() << " RootLatency: " << RootLatency << "\n"; @@ -281,7 +283,7 @@ bool MachineCombiner::improvesCriticalPathLen( unsigned NewCycleCount = NewRootDepth + NewRootLatency; unsigned OldCycleCount = RootDepth + RootLatency + RootSlack; - + return NewCycleCount <= OldCycleCount; } @@ -355,6 +357,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n"); auto BlockIter = MBB->begin(); + // Check if the block is in a loop. + const MachineLoop *ML = MLI->getLoopFor(MBB); while (BlockIter != MBB->end()) { auto &MI = *BlockIter++; @@ -407,11 +411,15 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { if (!NewInstCount) continue; + bool SubstituteAlways = false; + if (ML && TII->isThroughputPattern(P)) + SubstituteAlways = true; + // Substitute when we optimize for codesize and the new sequence has // fewer instructions OR // the new sequence neither lengthens the critical path nor increases // resource pressure. - if (doSubstitute(NewInstCount, OldInstCount) || + if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount) || (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, InstrIdxForVirtReg, P) && preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) { @@ -448,6 +456,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { SchedModel = STI.getSchedModel(); TSchedModel.init(SchedModel, &STI, TII); MRI = &MF.getRegInfo(); + MLI = &getAnalysis<MachineLoopInfo>(); Traces = &getAnalysis<MachineTraceMetrics>(); MinInstr = nullptr; OptSize = MF.getFunction()->optForSize(); diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp index a686341..8fdf39d 100644 --- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -21,7 +21,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -33,27 +32,47 @@ using namespace llvm; STATISTIC(NumDeletes, "Number of dead copies deleted"); namespace { + typedef SmallVector<unsigned, 4> RegList; + typedef DenseMap<unsigned, RegList> SourceMap; + typedef DenseMap<unsigned, MachineInstr*> Reg2MIMap; + class MachineCopyPropagation : public MachineFunctionPass { const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; - MachineRegisterInfo *MRI; + const MachineRegisterInfo *MRI; public: static char ID; // Pass identification, replacement for typeid MachineCopyPropagation() : MachineFunctionPass(ID) { - initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry()); + initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); } bool runOnMachineFunction(MachineFunction &MF) override; - private: - typedef SmallVector<unsigned, 4> DestList; - typedef DenseMap<unsigned, DestList> SourceMap; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } - void SourceNoLongerAvailable(unsigned Reg, - SourceMap &SrcMap, - DenseMap<unsigned, MachineInstr*> &AvailCopyMap); - bool CopyPropagateBlock(MachineBasicBlock &MBB); + private: + void ClobberRegister(unsigned Reg); + void CopyPropagateBlock(MachineBasicBlock &MBB); + bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def); + + /// Candidates for deletion. + SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; + /// Def -> available copies map. + Reg2MIMap AvailCopyMap; + /// Def -> copies map. + Reg2MIMap CopyMap; + /// Src -> Def map + SourceMap SrcMap; + bool Changed; }; } char MachineCopyPropagation::ID = 0; @@ -62,79 +81,105 @@ char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID; INITIALIZE_PASS(MachineCopyPropagation, "machine-cp", "Machine Copy Propagation Pass", false, false) -void -MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg, - SourceMap &SrcMap, - DenseMap<unsigned, MachineInstr*> &AvailCopyMap) { +/// Remove any entry in \p Map where the register is a subregister or equal to +/// a register contained in \p Regs. +static void removeRegsFromMap(Reg2MIMap &Map, const RegList &Regs, + const TargetRegisterInfo &TRI) { + for (unsigned Reg : Regs) { + // Source of copy is no longer available for propagation. + for (MCSubRegIterator SR(Reg, &TRI, true); SR.isValid(); ++SR) + Map.erase(*SR); + } +} + +/// Remove any entry in \p Map that is marked clobbered in \p RegMask. +/// The map will typically have a lot fewer entries than the regmask clobbers, +/// so this is more efficient than iterating the clobbered registers and calling +/// ClobberRegister() on them. +static void removeClobberedRegsFromMap(Reg2MIMap &Map, + const MachineOperand &RegMask) { + for (Reg2MIMap::iterator I = Map.begin(), E = Map.end(), Next; I != E; + I = Next) { + Next = std::next(I); + unsigned Reg = I->first; + if (RegMask.clobbersPhysReg(Reg)) + Map.erase(I); + } +} + +void MachineCopyPropagation::ClobberRegister(unsigned Reg) { for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + CopyMap.erase(*AI); + AvailCopyMap.erase(*AI); + SourceMap::iterator SI = SrcMap.find(*AI); if (SI != SrcMap.end()) { - const DestList& Defs = SI->second; - for (DestList::const_iterator I = Defs.begin(), E = Defs.end(); - I != E; ++I) { - unsigned MappedDef = *I; - // Source of copy is no longer available for propagation. - AvailCopyMap.erase(MappedDef); - for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR) - AvailCopyMap.erase(*SR); - } + removeRegsFromMap(AvailCopyMap, SI->second, *TRI); + SrcMap.erase(SI); } } } -static bool NoInterveningSideEffect(const MachineInstr *CopyMI, - const MachineInstr *MI) { - const MachineBasicBlock *MBB = CopyMI->getParent(); - if (MI->getParent() != MBB) - return false; - MachineBasicBlock::const_iterator I = CopyMI; - MachineBasicBlock::const_iterator E = MBB->end(); - MachineBasicBlock::const_iterator E2 = MI; - - ++I; - while (I != E && I != E2) { - if (I->hasUnmodeledSideEffects() || I->isCall() || - I->isTerminator()) - return false; - ++I; +/// Return true if \p PreviousCopy did copy register \p Src to register \p Def. +/// This fact may have been obscured by sub register usage or may not be true at +/// all even though Src and Def are subregisters of the registers used in +/// PreviousCopy. e.g. +/// isNopCopy("ecx = COPY eax", AX, CX) == true +/// isNopCopy("ecx = COPY eax", AH, CL) == false +static bool isNopCopy(const MachineInstr &PreviousCopy, unsigned Src, + unsigned Def, const TargetRegisterInfo *TRI) { + unsigned PreviousSrc = PreviousCopy.getOperand(1).getReg(); + unsigned PreviousDef = PreviousCopy.getOperand(0).getReg(); + if (Src == PreviousSrc) { + assert(Def == PreviousDef); + return true; } - return true; + if (!TRI->isSubRegister(PreviousSrc, Src)) + return false; + unsigned SubIdx = TRI->getSubRegIndex(PreviousSrc, Src); + return SubIdx == TRI->getSubRegIndex(PreviousDef, Def); } -/// isNopCopy - Return true if the specified copy is really a nop. That is -/// if the source of the copy is the same of the definition of the copy that -/// supplied the source. If the source of the copy is a sub-register than it -/// must check the sub-indices match. e.g. -/// ecx = mov eax -/// al = mov cl -/// But not -/// ecx = mov eax -/// al = mov ch -static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src, - const TargetRegisterInfo *TRI) { - unsigned SrcSrc = CopyMI->getOperand(1).getReg(); - if (Def == SrcSrc) - return true; - if (TRI->isSubRegister(SrcSrc, Def)) { - unsigned SrcDef = CopyMI->getOperand(0).getReg(); - unsigned SubIdx = TRI->getSubRegIndex(SrcSrc, Def); - if (!SubIdx) - return false; - return SubIdx == TRI->getSubRegIndex(SrcDef, Src); - } +/// Remove instruction \p Copy if there exists a previous copy that copies the +/// register \p Src to the register \p Def; This may happen indirectly by +/// copying the super registers. +bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src, + unsigned Def) { + // Avoid eliminating a copy from/to a reserved registers as we cannot predict + // the value (Example: The sparc zero register is writable but stays zero). + if (MRI->isReserved(Src) || MRI->isReserved(Def)) + return false; - return false; -} + // Search for an existing copy. + Reg2MIMap::iterator CI = AvailCopyMap.find(Def); + if (CI == AvailCopyMap.end()) + return false; + + // Check that the existing copy uses the correct sub registers. + MachineInstr &PrevCopy = *CI->second; + if (!isNopCopy(PrevCopy, Src, Def, TRI)) + return false; -bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { - SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion - DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map - DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map - SourceMap SrcMap; // Src -> Def map + DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; Copy.dump()); + // Copy was redundantly redefining either Src or Def. Remove earlier kill + // flags between Copy and PrevCopy because the value will be reused now. + assert(Copy.isCopy()); + unsigned CopyDef = Copy.getOperand(0).getReg(); + assert(CopyDef == Src || CopyDef == Def); + for (MachineInstr &MI : + make_range(PrevCopy.getIterator(), Copy.getIterator())) + MI.clearRegisterKills(CopyDef, TRI); + + Copy.eraseFromParent(); + Changed = true; + ++NumDeletes; + return true; +} + +void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n"); - bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { MachineInstr *MI = &*I; ++I; @@ -143,48 +188,32 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { unsigned Def = MI->getOperand(0).getReg(); unsigned Src = MI->getOperand(1).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Def) || - TargetRegisterInfo::isVirtualRegister(Src)) - report_fatal_error("MachineCopyPropagation should be run after" - " register allocation!"); - - DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src); - if (CI != AvailCopyMap.end()) { - MachineInstr *CopyMI = CI->second; - if (!MRI->isReserved(Def) && - (!MRI->isReserved(Src) || NoInterveningSideEffect(CopyMI, MI)) && - isNopCopy(CopyMI, Def, Src, TRI)) { - // The two copies cancel out and the source of the first copy - // hasn't been overridden, eliminate the second one. e.g. - // %ECX<def> = COPY %EAX<kill> - // ... nothing clobbered EAX. - // %EAX<def> = COPY %ECX - // => - // %ECX<def> = COPY %EAX - // - // Also avoid eliminating a copy from reserved registers unless the - // definition is proven not clobbered. e.g. - // %RSP<def> = COPY %RAX - // CALL - // %RAX<def> = COPY %RSP - - DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; MI->dump()); - - // Clear any kills of Def between CopyMI and MI. This extends the - // live range. - for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I) - I->clearRegisterKills(Def, TRI); - - MI->eraseFromParent(); - Changed = true; - ++NumDeletes; - continue; - } - } + assert(!TargetRegisterInfo::isVirtualRegister(Def) && + !TargetRegisterInfo::isVirtualRegister(Src) && + "MachineCopyPropagation should be run after register allocation!"); + + // The two copies cancel out and the source of the first copy + // hasn't been overridden, eliminate the second one. e.g. + // %ECX<def> = COPY %EAX + // ... nothing clobbered EAX. + // %EAX<def> = COPY %ECX + // => + // %ECX<def> = COPY %EAX + // + // or + // + // %ECX<def> = COPY %EAX + // ... nothing clobbered EAX. + // %ECX<def> = COPY %EAX + // => + // %ECX<def> = COPY %EAX + if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def)) + continue; - // If Src is defined by a previous copy, it cannot be eliminated. + // If Src is defined by a previous copy, the previous copy cannot be + // eliminated. for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) { - CI = CopyMap.find(*AI); + Reg2MIMap::iterator CI = CopyMap.find(*AI); if (CI != CopyMap.end()) { DEBUG(dbgs() << "MCP: Copy is no longer dead: "; CI->second->dump()); MaybeDeadCopies.remove(CI->second); @@ -194,23 +223,19 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump()); // Copy is now a candidate for deletion. - MaybeDeadCopies.insert(MI); + if (!MRI->isReserved(Def)) + MaybeDeadCopies.insert(MI); - // If 'Src' is previously source of another copy, then this earlier copy's + // If 'Def' is previously source of another copy, then this earlier copy's // source is no longer available. e.g. // %xmm9<def> = copy %xmm2 // ... // %xmm2<def> = copy %xmm0 // ... // %xmm2<def> = copy %xmm9 - SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap); + ClobberRegister(Def); // Remember Def is defined by the copy. - // ... Make sure to clear the def maps of aliases first. - for (MCRegAliasIterator AI(Def, TRI, false); AI.isValid(); ++AI) { - CopyMap.erase(*AI); - AvailCopyMap.erase(*AI); - } for (MCSubRegIterator SR(Def, TRI, /*IncludeSelf=*/true); SR.isValid(); ++SR) { CopyMap[*SR] = MI; @@ -219,30 +244,27 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // Remember source that's copied to Def. Once it's clobbered, then // it's no longer available for copy propagation. - if (std::find(SrcMap[Src].begin(), SrcMap[Src].end(), Def) == - SrcMap[Src].end()) { - SrcMap[Src].push_back(Def); - } + RegList &DestList = SrcMap[Src]; + if (std::find(DestList.begin(), DestList.end(), Def) == DestList.end()) + DestList.push_back(Def); continue; } // Not a copy. SmallVector<unsigned, 2> Defs; - int RegMaskOpNum = -1; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + const MachineOperand *RegMask = nullptr; + for (const MachineOperand &MO : MI->operands()) { if (MO.isRegMask()) - RegMaskOpNum = i; + RegMask = &MO; if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) - report_fatal_error("MachineCopyPropagation should be run after" - " register allocation!"); + assert(!TargetRegisterInfo::isVirtualRegister(Reg) && + "MachineCopyPropagation should be run after register allocation!"); if (MO.isDef()) { Defs.push_back(Reg); @@ -252,7 +274,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // If 'Reg' is defined by a copy, the copy is no longer a candidate // for elimination. for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(*AI); + Reg2MIMap::iterator CI = CopyMap.find(*AI); if (CI != CopyMap.end()) { DEBUG(dbgs() << "MCP: Copy is used - not dead: "; CI->second->dump()); MaybeDeadCopies.remove(CI->second); @@ -269,78 +291,81 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { } // The instruction has a register mask operand which means that it clobbers - // a large set of registers. It is possible to use the register mask to - // prune the available copies, but treat it like a basic block boundary for - // now. - if (RegMaskOpNum >= 0) { + // a large set of registers. Treat clobbered registers the same way as + // defined registers. + if (RegMask) { // Erase any MaybeDeadCopies whose destination register is clobbered. - const MachineOperand &MaskMO = MI->getOperand(RegMaskOpNum); - for (SmallSetVector<MachineInstr*, 8>::iterator - DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); - DI != DE; ++DI) { - unsigned Reg = (*DI)->getOperand(0).getReg(); - if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg)) + for (SmallSetVector<MachineInstr *, 8>::iterator DI = + MaybeDeadCopies.begin(); + DI != MaybeDeadCopies.end();) { + MachineInstr *MaybeDead = *DI; + unsigned Reg = MaybeDead->getOperand(0).getReg(); + assert(!MRI->isReserved(Reg)); + + if (!RegMask->clobbersPhysReg(Reg)) { + ++DI; continue; + } + DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: "; - (*DI)->dump()); - (*DI)->eraseFromParent(); + MaybeDead->dump()); + + // erase() will return the next valid iterator pointing to the next + // element after the erased one. + DI = MaybeDeadCopies.erase(DI); + MaybeDead->eraseFromParent(); Changed = true; ++NumDeletes; } - // Clear all data structures as if we were beginning a new basic block. - MaybeDeadCopies.clear(); - AvailCopyMap.clear(); - CopyMap.clear(); - SrcMap.clear(); - continue; - } - - for (unsigned i = 0, e = Defs.size(); i != e; ++i) { - unsigned Reg = Defs[i]; - - // No longer defined by a copy. - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - CopyMap.erase(*AI); - AvailCopyMap.erase(*AI); + removeClobberedRegsFromMap(AvailCopyMap, *RegMask); + removeClobberedRegsFromMap(CopyMap, *RegMask); + for (SourceMap::iterator I = SrcMap.begin(), E = SrcMap.end(), Next; + I != E; I = Next) { + Next = std::next(I); + if (RegMask->clobbersPhysReg(I->first)) { + removeRegsFromMap(AvailCopyMap, I->second, *TRI); + SrcMap.erase(I); + } } - - // If 'Reg' is previously source of a copy, it is no longer available for - // copy propagation. - SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap); } + + // Any previous copy definition or reading the Defs is no longer available. + for (unsigned Reg : Defs) + ClobberRegister(Reg); } // If MBB doesn't have successors, delete the copies whose defs are not used. // If MBB does have successors, then conservative assume the defs are live-out // since we don't want to trust live-in lists. if (MBB.succ_empty()) { - for (SmallSetVector<MachineInstr*, 8>::iterator - DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); - DI != DE; ++DI) { - if (!MRI->isReserved((*DI)->getOperand(0).getReg())) { - (*DI)->eraseFromParent(); - Changed = true; - ++NumDeletes; - } + for (MachineInstr *MaybeDead : MaybeDeadCopies) { + assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg())); + MaybeDead->eraseFromParent(); + Changed = true; + ++NumDeletes; } } - return Changed; + MaybeDeadCopies.clear(); + AvailCopyMap.clear(); + CopyMap.clear(); + SrcMap.clear(); } bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { - if (skipOptnoneFunction(*MF.getFunction())) + if (skipFunction(*MF.getFunction())) return false; - bool Changed = false; + Changed = false; TRI = MF.getSubtarget().getRegisterInfo(); TII = MF.getSubtarget().getInstrInfo(); MRI = &MF.getRegInfo(); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - Changed |= CopyPropagateBlock(*I); + for (MachineBasicBlock &MBB : MF) + CopyPropagateBlock(MBB); return Changed; } + diff --git a/contrib/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm/lib/CodeGen/MachineDominators.cpp index 3f04bb0..303a6a9 100644 --- a/contrib/llvm/lib/CodeGen/MachineDominators.cpp +++ b/contrib/llvm/lib/CodeGen/MachineDominators.cpp @@ -15,9 +15,20 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +// Always verify dominfo if expensive checking is enabled. +#ifdef EXPENSIVE_CHECKS +static bool VerifyMachineDomInfo = true; +#else +static bool VerifyMachineDomInfo = false; +#endif +static cl::opt<bool, true> VerifyMachineDomInfoX( + "verify-machine-dom-info", cl::location(VerifyMachineDomInfo), + cl::desc("Verify machine dominator info (time consuming)")); + namespace llvm { template class DomTreeNodeBase<MachineBasicBlock>; template class DominatorTreeBase<MachineBasicBlock>; @@ -57,6 +68,11 @@ void MachineDominatorTree::releaseMemory() { DT->releaseMemory(); } +void MachineDominatorTree::verifyAnalysis() const { + if (VerifyMachineDomInfo) + verifyDomTree(); +} + void MachineDominatorTree::print(raw_ostream &OS, const Module*) const { DT->print(OS); } @@ -125,3 +141,17 @@ void MachineDominatorTree::applySplitCriticalEdges() const { NewBBs.clear(); CriticalEdgesToSplit.clear(); } + +void MachineDominatorTree::verifyDomTree() const { + MachineFunction &F = *getRoot()->getParent(); + + MachineDominatorTree OtherDT; + OtherDT.DT->recalculate(F); + if (compare(OtherDT)) { + errs() << "MachineDominatorTree is not up to date!\nComputed:\n"; + print(errs(), nullptr); + errs() << "\nActual:\n"; + OtherDT.print(errs(), nullptr); + abort(); + } +} diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp index f6604f3..a7c63ef 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp @@ -54,6 +54,30 @@ static cl::opt<unsigned> void MachineFunctionInitializer::anchor() {} +void MachineFunctionProperties::print(raw_ostream &ROS, bool OnlySet) const { + // Leave this function even in NDEBUG as an out-of-line anchor. +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + for (BitVector::size_type i = 0; i < Properties.size(); ++i) { + bool HasProperty = Properties[i]; + if (OnlySet && !HasProperty) + continue; + switch(static_cast<Property>(i)) { + case Property::IsSSA: + ROS << (HasProperty ? "SSA, " : "Post SSA, "); + break; + case Property::TracksLiveness: + ROS << (HasProperty ? "" : "not ") << "tracking liveness, "; + break; + case Property::AllVRegsAllocated: + ROS << (HasProperty ? "AllVRegsAllocated" : "HasVRegs"); + break; + default: + break; + } + } +#endif +} + //===----------------------------------------------------------------------===// // MachineFunction implementation //===----------------------------------------------------------------------===// @@ -65,20 +89,34 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) { MBB->getParent()->DeleteMachineBasicBlock(MBB); } +static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI, + const Function *Fn) { + if (Fn->hasFnAttribute(Attribute::StackAlignment)) + return Fn->getFnStackAlignment(); + return STI->getFrameLowering()->getStackAlignment(); +} + MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, unsigned FunctionNum, MachineModuleInfo &mmi) : Fn(F), Target(TM), STI(TM.getSubtargetImpl(*F)), Ctx(mmi.getContext()), MMI(mmi) { + // Assume the function starts in SSA form with correct liveness. + Properties.set(MachineFunctionProperties::Property::IsSSA); + Properties.set(MachineFunctionProperties::Property::TracksLiveness); if (STI->getRegisterInfo()) RegInfo = new (Allocator) MachineRegisterInfo(this); else RegInfo = nullptr; MFInfo = nullptr; - FrameInfo = new (Allocator) - MachineFrameInfo(STI->getFrameLowering()->getStackAlignment(), - STI->getFrameLowering()->isStackRealignable(), - !F->hasFnAttribute("no-realign-stack")); + // We can realign the stack if the target supports it and the user hasn't + // explicitly asked us not to. + bool CanRealignSP = STI->getFrameLowering()->isStackRealignable() && + !F->hasFnAttribute("no-realign-stack"); + FrameInfo = new (Allocator) MachineFrameInfo( + getFnStackAlignment(STI, Fn), /*StackRealignable=*/CanRealignSP, + /*ForceRealign=*/CanRealignSP && + F->hasFnAttribute(Attribute::StackAlignment)); if (Fn->hasFnAttribute(Attribute::StackAlignment)) FrameInfo->ensureMaxAlignment(Fn->getFnStackAlignment()); @@ -209,9 +247,9 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { } /// Allocate a new MachineInstr. Use this instead of `new MachineInstr'. -MachineInstr * -MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID, - DebugLoc DL, bool NoImp) { +MachineInstr *MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID, + const DebugLoc &DL, + bool NoImp) { return new (InstructionRecycler.Allocate<MachineInstr>(Allocator)) MachineInstr(*this, MCID, DL, NoImp); } @@ -256,13 +294,11 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { BasicBlockRecycler.Deallocate(Allocator, MBB); } -MachineMemOperand * -MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f, - uint64_t s, unsigned base_alignment, - const AAMDNodes &AAInfo, - const MDNode *Ranges) { - return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment, - AAInfo, Ranges); +MachineMemOperand *MachineFunction::getMachineMemOperand( + MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, + unsigned base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges) { + return new (Allocator) + MachineMemOperand(PtrInfo, f, s, base_alignment, AAInfo, Ranges); } MachineMemOperand * @@ -358,7 +394,7 @@ const char *MachineFunction::createExternalSymbolName(StringRef Name) { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void MachineFunction::dump() const { +LLVM_DUMP_METHOD void MachineFunction::dump() const { print(dbgs()); } #endif @@ -368,14 +404,11 @@ StringRef MachineFunction::getName() const { return getFunction()->getName(); } -void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { +void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const { OS << "# Machine code for function " << getName() << ": "; - if (RegInfo) { - OS << (RegInfo->isSSA() ? "SSA" : "Post SSA"); - if (!RegInfo->tracksLiveness()) - OS << ", not tracking liveness"; - } - OS << '\n'; + OS << "Properties: <"; + getProperties().print(OS); + OS << ">\n"; // Print Frame Information FrameInfo->print(*this, OS); @@ -523,7 +556,7 @@ MCSymbol *MachineFunction::getPICBaseSymbol() const { /// Make sure the function is at least Align bytes aligned. void MachineFrameInfo::ensureMaxAlignment(unsigned Align) { - if (!StackRealignable || !RealignOption) + if (!StackRealignable) assert(Align <= StackAlignment && "For targets without stack realignment, Align is out of limit!"); if (MaxAlignment < Align) MaxAlignment = Align; @@ -545,8 +578,7 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, const AllocaInst *Alloca) { assert(Size != 0 && "Cannot allocate zero size stack objects!"); - Alignment = clampStackAlignment(!StackRealignable || !RealignOption, - Alignment, StackAlignment); + Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca, !isSS)); int Index = (int)Objects.size() - NumFixedObjects - 1; @@ -559,8 +591,7 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, /// returning a nonnegative identifier to represent it. int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, unsigned Alignment) { - Alignment = clampStackAlignment(!StackRealignable || !RealignOption, - Alignment, StackAlignment); + Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); CreateStackObject(Size, Alignment, true); int Index = (int)Objects.size() - NumFixedObjects - 1; ensureMaxAlignment(Alignment); @@ -573,8 +604,7 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca) { HasVarSizedObjects = true; - Alignment = clampStackAlignment(!StackRealignable || !RealignOption, - Alignment, StackAlignment); + Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca, true)); ensureMaxAlignment(Alignment); return (int)Objects.size()-NumFixedObjects-1; @@ -590,10 +620,11 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, // The alignment of the frame index can be determined from its offset from // the incoming frame position. If the frame object is at offset 32 and // the stack is guaranteed to be 16-byte aligned, then we know that the - // object is 16-byte aligned. - unsigned Align = MinAlign(SPOffset, StackAlignment); - Align = clampStackAlignment(!StackRealignable || !RealignOption, Align, - StackAlignment); + // object is 16-byte aligned. Note that unlike the non-fixed case, if the + // stack needs realignment, we can't assume that the stack will in fact be + // aligned. + unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); + Align = clampStackAlignment(!StackRealignable, Align, StackAlignment); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, /*isSS*/ false, /*Alloca*/ nullptr, isAliased)); @@ -604,9 +635,8 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, /// Returns an index with a negative value. int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset) { - unsigned Align = MinAlign(SPOffset, StackAlignment); - Align = clampStackAlignment(!StackRealignable || !RealignOption, Align, - StackAlignment); + unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); + Align = clampStackAlignment(!StackRealignable, Align, StackAlignment); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, /*Immutable*/ true, /*isSS*/ true, @@ -819,7 +849,7 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void MachineJumpTableInfo::dump() const { print(dbgs()); } +LLVM_DUMP_METHOD void MachineJumpTableInfo::dump() const { print(dbgs()); } #endif @@ -852,6 +882,8 @@ MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const { return SectionKind::getMergeableConst8(); case 16: return SectionKind::getMergeableConst16(); + case 32: + return SectionKind::getMergeableConst32(); default: return SectionKind::getReadOnly(); } @@ -895,17 +927,17 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, // the constant folding APIs to do this so that we get the benefit of // DataLayout. if (isa<PointerType>(A->getType())) - A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy, - const_cast<Constant *>(A), DL); + A = ConstantFoldCastOperand(Instruction::PtrToInt, + const_cast<Constant *>(A), IntTy, DL); else if (A->getType() != IntTy) - A = ConstantFoldInstOperands(Instruction::BitCast, IntTy, - const_cast<Constant *>(A), DL); + A = ConstantFoldCastOperand(Instruction::BitCast, const_cast<Constant *>(A), + IntTy, DL); if (isa<PointerType>(B->getType())) - B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy, - const_cast<Constant *>(B), DL); + B = ConstantFoldCastOperand(Instruction::PtrToInt, + const_cast<Constant *>(B), IntTy, DL); else if (B->getType() != IntTy) - B = ConstantFoldInstOperands(Instruction::BitCast, IntTy, - const_cast<Constant *>(B), DL); + B = ConstantFoldCastOperand(Instruction::BitCast, const_cast<Constant *>(B), + IntTy, DL); return A == B; } @@ -966,5 +998,5 @@ void MachineConstantPool::print(raw_ostream &OS) const { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void MachineConstantPool::dump() const { print(dbgs()); } +LLVM_DUMP_METHOD void MachineConstantPool::dump() const { print(dbgs()); } #endif diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp index 05463fc..228fe17 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp @@ -21,11 +21,13 @@ #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" + using namespace llvm; Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O, @@ -40,7 +42,26 @@ bool MachineFunctionPass::runOnFunction(Function &F) { return false; MachineFunction &MF = getAnalysis<MachineFunctionAnalysis>().getMF(); - return runOnMachineFunction(MF); + MachineFunctionProperties &MFProps = MF.getProperties(); + +#ifndef NDEBUG + if (!MFProps.verifyRequiredProperties(RequiredProperties)) { + errs() << "MachineFunctionProperties required by " << getPassName() + << " pass are not met by function " << F.getName() << ".\n" + << "Required properties: "; + RequiredProperties.print(errs(), /*OnlySet=*/true); + errs() << "\nCurrent properties: "; + MFProps.print(errs()); + errs() << "\n"; + llvm_unreachable("MachineFunctionProperties check failed"); + } +#endif + + bool RV = runOnMachineFunction(MF); + + MFProps.set(SetProperties); + MFProps.clear(ClearedProperties); + return RV; } void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const { @@ -53,13 +74,13 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const { // because CodeGen overloads that to mean preserving the MachineBasicBlock // CFG in addition to the LLVM IR CFG. AU.addPreserved<BasicAAWrapperPass>(); - AU.addPreserved<DominanceFrontier>(); + AU.addPreserved<DominanceFrontierWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); AU.addPreserved<AAResultsWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); - AU.addPreserved<IVUsers>(); + AU.addPreserved<IVUsersWrapperPass>(); AU.addPreserved<LoopInfoWrapperPass>(); - AU.addPreserved<MemoryDependenceAnalysis>(); + AU.addPreserved<MemoryDependenceWrapperPass>(); AU.addPreserved<ScalarEvolutionWrapperPass>(); AU.addPreserved<SCEVAAWrapperPass>(); AU.addPreserved<StackProtector>(); diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp index 6dca74d..3cdf8d2 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -372,10 +373,16 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, getCImm()->getValue().print(OS, false); break; case MachineOperand::MO_FPImmediate: - if (getFPImm()->getType()->isFloatTy()) + if (getFPImm()->getType()->isFloatTy()) { OS << getFPImm()->getValueAPF().convertToFloat(); - else + } else if (getFPImm()->getType()->isHalfTy()) { + APFloat APF = getFPImm()->getValueAPF(); + bool Unused; + APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &Unused); + OS << "half " << APF.convertToFloat(); + } else { OS << getFPImm()->getValueAPF().convertToDouble(); + } break; case MachineOperand::MO_MachineBasicBlock: OS << "<BB#" << getMBB()->getNumber() << ">"; @@ -490,13 +497,12 @@ MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF, return MachinePointerInfo(MF.getPSVManager().getStack(), Offset); } -MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f, +MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, uint64_t s, unsigned int a, const AAMDNodes &AAInfo, const MDNode *Ranges) - : PtrInfo(ptrinfo), Size(s), - Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)), - AAInfo(AAInfo), Ranges(Ranges) { + : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1), + AAInfo(AAInfo), Ranges(Ranges) { assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue*>() || isa<PointerType>(PtrInfo.V.get<const Value*>()->getType())) && "invalid pointer value"); @@ -510,7 +516,8 @@ void MachineMemOperand::Profile(FoldingSetNodeID &ID) const { ID.AddInteger(getOffset()); ID.AddInteger(Size); ID.AddPointer(getOpaqueValue()); - ID.AddInteger(Flags); + ID.AddInteger(getFlags()); + ID.AddInteger(getBaseAlignment()); } void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { @@ -521,8 +528,7 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { if (MMO->getBaseAlignment() >= getBaseAlignment()) { // Update the alignment value. - Flags = (Flags & ((1 << MOMaxBits) - 1)) | - ((Log2_32(MMO->getBaseAlignment()) + 1) << MOMaxBits); + BaseAlignLog2 = Log2_32(MMO->getBaseAlignment()) + 1; // Also update the base and offset, because the new alignment may // not be applicable with the old ones. PtrInfo = MMO->PtrInfo; @@ -647,7 +653,12 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, DebugLoc dl, bool NoImp) : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0), AsmPrinterFlags(0), NumMemRefs(0), MemRefs(nullptr), - debugLoc(std::move(dl)) { + debugLoc(std::move(dl)) +#ifdef LLVM_BUILD_GLOBAL_ISEL + , + Ty(nullptr) +#endif +{ assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); // Reserve space for the expected number of operands. @@ -664,10 +675,14 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, /// MachineInstr ctor - Copies MachineInstr arg exactly /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : MCID(&MI.getDesc()), Parent(nullptr), Operands(nullptr), NumOperands(0), - Flags(0), AsmPrinterFlags(0), - NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs), - debugLoc(MI.getDebugLoc()) { + : MCID(&MI.getDesc()), Parent(nullptr), Operands(nullptr), NumOperands(0), + Flags(0), AsmPrinterFlags(0), NumMemRefs(MI.NumMemRefs), + MemRefs(MI.MemRefs), debugLoc(MI.getDebugLoc()) +#ifdef LLVM_BUILD_GLOBAL_ISEL + , + Ty(nullptr) +#endif +{ assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); CapOperands = OperandCapacity::get(MI.getNumOperands()); @@ -690,6 +705,25 @@ MachineRegisterInfo *MachineInstr::getRegInfo() { return nullptr; } +// Implement dummy setter and getter for type when +// global-isel is not built. +// The proper implementation is WIP and is tracked here: +// PR26576. +#ifndef LLVM_BUILD_GLOBAL_ISEL +void MachineInstr::setType(Type *Ty) {} + +Type *MachineInstr::getType() const { return nullptr; } + +#else +void MachineInstr::setType(Type *Ty) { + assert((!Ty || isPreISelGenericOpcode(getOpcode())) && + "Non generic instructions are not supposed to be typed"); + this->Ty = Ty; +} + +Type *MachineInstr::getType() const { return Ty; } +#endif // LLVM_BUILD_GLOBAL_ISEL + /// RemoveRegOperandsFromUseLists - Unlink all of the register operands in /// this instruction from their respective use lists. This requires that the /// operands already be on their use lists. @@ -867,7 +901,7 @@ void MachineInstr::addMemOperand(MachineFunction &MF, } /// Check to see if the MMOs pointed to by the two MemRefs arrays are -/// identical. +/// identical. static bool hasIdenticalMMOs(const MachineInstr &MI1, const MachineInstr &MI2) { auto I1 = MI1.memoperands_begin(), E1 = MI1.memoperands_end(); auto I2 = MI2.memoperands_begin(), E2 = MI2.memoperands_end(); @@ -894,7 +928,7 @@ MachineInstr::mergeMemRefsWith(const MachineInstr& Other) { // cases in practice. if (hasIdenticalMMOs(*this, Other)) return std::make_pair(MemRefs, NumMemRefs); - + // TODO: consider uniquing elements within the operand lists to reduce // space usage and fall back to conservative information less often. size_t CombinedNumMemRefs = NumMemRefs + Other.NumMemRefs; @@ -913,7 +947,7 @@ MachineInstr::mergeMemRefsWith(const MachineInstr& Other) { MemEnd); assert(MemEnd - MemBegin == (ptrdiff_t)CombinedNumMemRefs && "missing memrefs"); - + return std::make_pair(MemBegin, CombinedNumMemRefs); } @@ -933,23 +967,23 @@ bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const { } } -bool MachineInstr::isIdenticalTo(const MachineInstr *Other, +bool MachineInstr::isIdenticalTo(const MachineInstr &Other, MICheckType Check) const { // If opcodes or number of operands are not the same then the two // instructions are obviously not identical. - if (Other->getOpcode() != getOpcode() || - Other->getNumOperands() != getNumOperands()) + if (Other.getOpcode() != getOpcode() || + Other.getNumOperands() != getNumOperands()) return false; if (isBundle()) { // Both instructions are bundles, compare MIs inside the bundle. MachineBasicBlock::const_instr_iterator I1 = getIterator(); MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end(); - MachineBasicBlock::const_instr_iterator I2 = Other->getIterator(); - MachineBasicBlock::const_instr_iterator E2= Other->getParent()->instr_end(); + MachineBasicBlock::const_instr_iterator I2 = Other.getIterator(); + MachineBasicBlock::const_instr_iterator E2 = Other.getParent()->instr_end(); while (++I1 != E1 && I1->isInsideBundle()) { ++I2; - if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(&*I2, Check)) + if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(*I2, Check)) return false; } } @@ -957,7 +991,7 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other, // Check operands to make sure they match. for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); - const MachineOperand &OMO = Other->getOperand(i); + const MachineOperand &OMO = Other.getOperand(i); if (!MO.isReg()) { if (!MO.isIdenticalTo(OMO)) return false; @@ -990,8 +1024,8 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other, } // If DebugLoc does not match then two dbg.values are not identical. if (isDebugValue()) - if (getDebugLoc() && Other->getDebugLoc() && - getDebugLoc() != Other->getDebugLoc()) + if (getDebugLoc() && Other.getDebugLoc() && + getDebugLoc() != Other.getDebugLoc()) return false; return true; } @@ -1130,6 +1164,16 @@ int MachineInstr::findInlineAsmFlagIdx(unsigned OpIdx, return -1; } +const DILocalVariable *MachineInstr::getDebugVariable() const { + assert(isDebugValue() && "not a DBG_VALUE"); + return cast<DILocalVariable>(getOperand(2).getMetadata()); +} + +const DIExpression *MachineInstr::getDebugExpression() const { + assert(isDebugValue() && "not a DBG_VALUE"); + return cast<DIExpression>(getOperand(3).getMetadata()); +} + const TargetRegisterClass* MachineInstr::getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, @@ -1157,7 +1201,10 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, unsigned Flag = getOperand(FlagIdx).getImm(); unsigned RCID; - if (InlineAsm::hasRegClassConstraint(Flag, RCID)) + if ((InlineAsm::getKind(Flag) == InlineAsm::Kind_RegUse || + InlineAsm::getKind(Flag) == InlineAsm::Kind_RegDef || + InlineAsm::getKind(Flag) == InlineAsm::Kind_RegDefEarlyClobber) && + InlineAsm::hasRegClassConstraint(Flag, RCID)) return TRI->getRegClass(RCID); // Assume that all registers in a memory operand are pointers. @@ -1173,7 +1220,7 @@ const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg( // Check every operands inside the bundle if we have // been asked to. if (ExploreBundle) - for (ConstMIBundleOperands OpndIt(this); OpndIt.isValid() && CurRC; + for (ConstMIBundleOperands OpndIt(*this); OpndIt.isValid() && CurRC; ++OpndIt) CurRC = OpndIt->getParent()->getRegClassConstraintEffectForVRegImpl( OpndIt.getOperandNo(), Reg, CurRC, TII, TRI); @@ -1219,11 +1266,24 @@ const TargetRegisterClass *MachineInstr::getRegClassConstraintEffect( unsigned MachineInstr::getBundleSize() const { MachineBasicBlock::const_instr_iterator I = getIterator(); unsigned Size = 0; - while (I->isBundledWithSucc()) - ++Size, ++I; + while (I->isBundledWithSucc()) { + ++Size; + ++I; + } return Size; } +/// Returns true if the MachineInstr has an implicit-use operand of exactly +/// the given register (not considering sub/super-registers). +bool MachineInstr::hasRegisterImplicitUseOperand(unsigned Reg) const { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == Reg) + return true; + } + return false; +} + /// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of /// the specific register or -1 if it is not found. It further tightens /// the search criteria to a use that kills the register if isKill is true. @@ -1498,12 +1558,10 @@ bool MachineInstr::hasOrderedMemoryRef() const { if (memoperands_empty()) return true; - // Check the memory reference information for ordered references. - for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I) - if (!(*I)->isUnordered()) - return true; - - return false; + // Check if any of our memory operands are ordered. + return any_of(memoperands(), [](const MachineMemOperand *MMO) { + return !MMO->isUnordered(); + }); } /// isInvariantLoad - Return true if this instruction is loading from a @@ -1523,23 +1581,21 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo(); - for (mmo_iterator I = memoperands_begin(), - E = memoperands_end(); I != E; ++I) { - if ((*I)->isVolatile()) return false; - if ((*I)->isStore()) return false; - if ((*I)->isInvariant()) return true; - + for (MachineMemOperand *MMO : memoperands()) { + if (MMO->isVolatile()) return false; + if (MMO->isStore()) return false; + if (MMO->isInvariant()) continue; // A load from a constant PseudoSourceValue is invariant. - if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) + if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) if (PSV->isConstant(MFI)) continue; - if (const Value *V = (*I)->getValue()) { + if (const Value *V = MMO->getValue()) { // If we have an AliasAnalysis, ask it whether the memory is constant. if (AA && AA->pointsToConstantMemory( - MemoryLocation(V, (*I)->getSize(), (*I)->getAAInfo()))) + MemoryLocation(V, MMO->getSize(), MMO->getAAInfo()))) continue; } @@ -1598,16 +1654,16 @@ bool MachineInstr::allDefsAreDead() const { /// copyImplicitOps - Copy implicit register operands from specified /// instruction to this instruction. void MachineInstr::copyImplicitOps(MachineFunction &MF, - const MachineInstr *MI) { - for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands(); + const MachineInstr &MI) { + for (unsigned i = MI.getDesc().getNumOperands(), e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + const MachineOperand &MO = MI.getOperand(i); if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask()) addOperand(MF, MO); } } -void MachineInstr::dump() const { +LLVM_DUMP_METHOD void MachineInstr::dump() const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) dbgs() << " " << *this; #endif @@ -1651,8 +1707,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, if (StartOp != 0) OS << ", "; getOperand(StartOp).print(OS, MST, TRI); unsigned Reg = getOperand(StartOp).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (TargetRegisterInfo::isVirtualRegister(Reg)) { VirtRegs.push_back(Reg); + unsigned Size; + if (MRI && (Size = MRI->getSize(Reg))) + OS << '(' << Size << ')'; + } } if (StartOp != 0) @@ -1664,6 +1724,12 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, else OS << "UNKNOWN"; + if (getType()) { + OS << ' '; + getType()->print(OS, /*IsForDebug*/ false, /*NoDetails*/ true); + OS << ' '; + } + if (SkipOpers) return; @@ -1686,6 +1752,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << " [mayload]"; if (ExtraInfo & InlineAsm::Extra_MayStore) OS << " [maystore]"; + if (ExtraInfo & InlineAsm::Extra_IsConvergent) + OS << " [isconvergent]"; if (ExtraInfo & InlineAsm::Extra_IsAlignStack) OS << " [alignstack]"; if (getInlineAsmDialect() == InlineAsm::AD_ATT) @@ -1761,13 +1829,41 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, } unsigned RCID = 0; - if (InlineAsm::hasRegClassConstraint(Flag, RCID)) { + if (!InlineAsm::isImmKind(Flag) && !InlineAsm::isMemKind(Flag) && + InlineAsm::hasRegClassConstraint(Flag, RCID)) { if (TRI) { OS << ':' << TRI->getRegClassName(TRI->getRegClass(RCID)); } else OS << ":RC" << RCID; } + if (InlineAsm::isMemKind(Flag)) { + unsigned MCID = InlineAsm::getMemoryConstraintID(Flag); + switch (MCID) { + case InlineAsm::Constraint_es: OS << ":es"; break; + case InlineAsm::Constraint_i: OS << ":i"; break; + case InlineAsm::Constraint_m: OS << ":m"; break; + case InlineAsm::Constraint_o: OS << ":o"; break; + case InlineAsm::Constraint_v: OS << ":v"; break; + case InlineAsm::Constraint_Q: OS << ":Q"; break; + case InlineAsm::Constraint_R: OS << ":R"; break; + case InlineAsm::Constraint_S: OS << ":S"; break; + case InlineAsm::Constraint_T: OS << ":T"; break; + case InlineAsm::Constraint_Um: OS << ":Um"; break; + case InlineAsm::Constraint_Un: OS << ":Un"; break; + case InlineAsm::Constraint_Uq: OS << ":Uq"; break; + case InlineAsm::Constraint_Us: OS << ":Us"; break; + case InlineAsm::Constraint_Ut: OS << ":Ut"; break; + case InlineAsm::Constraint_Uv: OS << ":Uv"; break; + case InlineAsm::Constraint_Uy: OS << ":Uy"; break; + case InlineAsm::Constraint_X: OS << ":X"; break; + case InlineAsm::Constraint_Z: OS << ":Z"; break; + case InlineAsm::Constraint_ZC: OS << ":ZC"; break; + case InlineAsm::Constraint_Zy: OS << ":Zy"; break; + default: OS << ":?"; break; + } + } + unsigned TiedTo = 0; if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo)) OS << " tiedto:$" << TiedTo; @@ -1824,11 +1920,18 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, HaveSemi = true; } for (unsigned i = 0; i != VirtRegs.size(); ++i) { - const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]); - OS << " " << TRI->getRegClassName(RC) - << ':' << PrintReg(VirtRegs[i]); + const RegClassOrRegBank &RC = MRI->getRegClassOrRegBank(VirtRegs[i]); + if (!RC) + continue; + // Generic virtual registers do not have register classes. + if (RC.is<const RegisterBank *>()) + OS << " " << RC.get<const RegisterBank *>()->getName(); + else + OS << " " + << TRI->getRegClassName(RC.get<const TargetRegisterClass *>()); + OS << ':' << PrintReg(VirtRegs[i]); for (unsigned j = i+1; j != VirtRegs.size();) { - if (MRI->getRegClass(VirtRegs[j]) != RC) { + if (MRI->getRegClassOrRegBank(VirtRegs[j]) != RC) { ++j; continue; } @@ -1877,6 +1980,13 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, MachineOperand &MO = getOperand(i); if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; + + // DEBUG_VALUE nodes do not contribute to code generation and should + // always be ignored. Failure to do so may result in trying to modify + // KILL flags on DEBUG_VALUE nodes. + if (MO.isDebug()) + continue; + unsigned Reg = MO.getReg(); if (!Reg) continue; @@ -1932,7 +2042,7 @@ void MachineInstr::clearRegisterKills(unsigned Reg, if (!MO.isReg() || !MO.isUse() || !MO.isKill()) continue; unsigned OpReg = MO.getReg(); - if (OpReg == Reg || (RegInfo && RegInfo->isSuperRegister(Reg, OpReg))) + if ((RegInfo && RegInfo->regsOverlap(Reg, OpReg)) || Reg == OpReg) MO.setIsKill(false); } } @@ -2085,3 +2195,42 @@ void MachineInstr::emitError(StringRef Msg) const { return MF->getMMI().getModule()->getContext().emitError(LocCookie, Msg); report_fatal_error(Msg); } + +MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL, + const MCInstrDesc &MCID, bool IsIndirect, + unsigned Reg, unsigned Offset, + const MDNode *Variable, const MDNode *Expr) { + assert(isa<DILocalVariable>(Variable) && "not a variable"); + assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); + assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + if (IsIndirect) + return BuildMI(MF, DL, MCID) + .addReg(Reg, RegState::Debug) + .addImm(Offset) + .addMetadata(Variable) + .addMetadata(Expr); + else { + assert(Offset == 0 && "A direct address cannot have an offset."); + return BuildMI(MF, DL, MCID) + .addReg(Reg, RegState::Debug) + .addReg(0U, RegState::Debug) + .addMetadata(Variable) + .addMetadata(Expr); + } +} + +MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, const MCInstrDesc &MCID, + bool IsIndirect, unsigned Reg, + unsigned Offset, const MDNode *Variable, + const MDNode *Expr) { + assert(isa<DILocalVariable>(Variable) && "not a variable"); + assert(cast<DIExpression>(Expr)->isValid() && "not an expression"); + MachineFunction &MF = *BB.getParent(); + MachineInstr *MI = + BuildMI(MF, DL, MCID, IsIndirect, Reg, Offset, Variable, Expr); + BB.insert(I, MI); + return MachineInstrBuilder(MF, MI); +} diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp index 4619daf..e4686b3 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -17,6 +17,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include <utility> using namespace llvm; namespace { @@ -24,7 +25,7 @@ namespace { public: static char ID; // Pass identification UnpackMachineBundles(std::function<bool(const Function &)> Ftor = nullptr) - : MachineFunctionPass(ID), PredicateFtor(Ftor) { + : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) { initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry()); } @@ -78,7 +79,7 @@ bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) { FunctionPass * llvm::createUnpackMachineBundles(std::function<bool(const Function &)> Ftor) { - return new UnpackMachineBundles(Ftor); + return new UnpackMachineBundles(std::move(Ftor)); } namespace { @@ -293,7 +294,7 @@ MachineOperandIteratorBase::PhysRegInfo MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, const TargetRegisterInfo *TRI) { bool AllDefsDead = true; - PhysRegInfo PRI = {false, false, false, false, false, false, false}; + PhysRegInfo PRI = {false, false, false, false, false, false, false, false}; assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "analyzePhysReg not given a physical register!"); @@ -332,8 +333,12 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, } } - if (AllDefsDead && PRI.FullyDefined) - PRI.DeadDef = true; + if (AllDefsDead) { + if (PRI.FullyDefined || PRI.Clobbered) + PRI.DeadDef = true; + else if (PRI.Defined) + PRI.PartialDeadDef = true; + } return PRI; } diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp index 99a97d2..119751b 100644 --- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp @@ -260,7 +260,7 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { } bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { - if (skipOptnoneFunction(*MF.getFunction())) + if (skipFunction(*MF.getFunction())) return false; Changed = FirstInLoop = false; @@ -428,7 +428,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI, if (Def && !RuledOut) { int FI = INT_MIN; if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || - (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI))) + (TII->isLoadFromStackSlot(*MI, FI) && MFI->isSpillSlotObjectIndex(FI))) Candidates.push_back(CandidateInfo(MI, Def, FI)); } } @@ -581,14 +581,14 @@ bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { } void MachineLICM::EnterScope(MachineBasicBlock *MBB) { - DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); + DEBUG(dbgs() << "Entering BB#" << MBB->getNumber() << '\n'); // Remember livein register pressure. BackTrace.push_back(RegPressure); } void MachineLICM::ExitScope(MachineBasicBlock *MBB) { - DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); + DEBUG(dbgs() << "Exiting BB#" << MBB->getNumber() << '\n'); BackTrace.pop_back(); } @@ -764,7 +764,7 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { if (BB->pred_size() == 1) { MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; - if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty()) + if (!TII->analyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty()) InitRegPressure(*BB->pred_begin()); } @@ -982,7 +982,7 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, if (MOReg != Reg) continue; - if (TII->hasHighOperandLatency(SchedModel, MRI, &MI, DefIdx, &UseMI, i)) + if (TII->hasHighOperandLatency(SchedModel, MRI, MI, DefIdx, UseMI, i)) return true; } @@ -996,7 +996,7 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, /// Return true if the instruction is marked "cheap" or the operand latency /// between its def and a use is one or less. bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { - if (TII->isAsCheapAsAMove(&MI) || MI.isCopyLike()) + if (TII->isAsCheapAsAMove(MI) || MI.isCopyLike()) return true; bool isCheap = false; @@ -1010,7 +1010,7 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { if (TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - if (!TII->hasLowDefLatency(SchedModel, &MI, i)) + if (!TII->hasLowDefLatency(SchedModel, MI, i)) return false; isCheap = true; } @@ -1086,7 +1086,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { // Rematerializable instructions should always be hoisted since the register // allocator can just pull them down again when needed. - if (TII->isTriviallyReMaterializable(&MI, AA)) + if (TII->isTriviallyReMaterializable(MI, AA)) return true; // FIXME: If there are long latency loop-invariant instructions inside the @@ -1139,8 +1139,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { // High register pressure situation, only hoist if the instruction is going // to be remat'ed. - if (!TII->isTriviallyReMaterializable(&MI, AA) && - !MI.isInvariantLoad(AA)) { + if (!TII->isTriviallyReMaterializable(MI, AA) && !MI.isInvariantLoad(AA)) { DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); return false; } @@ -1171,17 +1170,15 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { &LoadRegIndex); if (NewOpc == 0) return nullptr; const MCInstrDesc &MID = TII->get(NewOpc); - if (MID.getNumDefs() != 1) return nullptr; MachineFunction &MF = *MI->getParent()->getParent(); const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); // Ok, we're unfolding. Create a temporary register and do the unfold. unsigned Reg = MRI->createVirtualRegister(RC); SmallVector<MachineInstr *, 2> NewMIs; - bool Success = - TII->unfoldMemoryOperand(MF, MI, Reg, - /*UnfoldLoad=*/true, /*UnfoldStore=*/false, - NewMIs); + bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg, + /*UnfoldLoad=*/true, + /*UnfoldStore=*/false, NewMIs); (void)Success; assert(Success && "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " @@ -1222,7 +1219,7 @@ const MachineInstr* MachineLICM::LookForDuplicate(const MachineInstr *MI, std::vector<const MachineInstr*> &PrevMIs) { for (const MachineInstr *PrevMI : PrevMIs) - if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : nullptr))) + if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr))) return PrevMI; return nullptr; @@ -1317,12 +1314,10 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { // terminator instructions. DEBUG({ dbgs() << "Hoisting " << *MI; - if (Preheader->getBasicBlock()) - dbgs() << " to MachineBasicBlock " - << Preheader->getName(); if (MI->getParent()->getBasicBlock()) - dbgs() << " from MachineBasicBlock " - << MI->getParent()->getName(); + dbgs() << " from BB#" << MI->getParent()->getNumber(); + if (Preheader->getBasicBlock()) + dbgs() << " to BB#" << Preheader->getNumber(); dbgs() << "\n"; }); @@ -1382,7 +1377,7 @@ MachineBasicBlock *MachineLICM::getCurPreheader() { return nullptr; } - CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this); + CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), *this); if (!CurPreheader) { CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); return nullptr; diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp index 2f5c9e0..376f78f 100644 --- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -50,11 +50,12 @@ void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { MachineBasicBlock *MachineLoop::getTopBlock() { MachineBasicBlock *TopMBB = getHeader(); MachineFunction::iterator Begin = TopMBB->getParent()->begin(); - if (TopMBB != Begin) { + if (TopMBB->getIterator() != Begin) { MachineBasicBlock *PriorMBB = &*std::prev(TopMBB->getIterator()); while (contains(PriorMBB)) { TopMBB = PriorMBB; - if (TopMBB == Begin) break; + if (TopMBB->getIterator() == Begin) + break; PriorMBB = &*std::prev(TopMBB->getIterator()); } } @@ -64,7 +65,7 @@ MachineBasicBlock *MachineLoop::getTopBlock() { MachineBasicBlock *MachineLoop::getBottomBlock() { MachineBasicBlock *BotMBB = getHeader(); MachineFunction::iterator End = BotMBB->getParent()->end(); - if (BotMBB != std::prev(End)) { + if (BotMBB->getIterator() != std::prev(End)) { MachineBasicBlock *NextMBB = &*std::next(BotMBB->getIterator()); while (contains(NextMBB)) { BotMBB = NextMBB; @@ -77,7 +78,7 @@ MachineBasicBlock *MachineLoop::getBottomBlock() { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void MachineLoop::dump() const { +LLVM_DUMP_METHOD void MachineLoop::dump() const { print(dbgs()); } #endif diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp index 1956a70..244e3fb 100644 --- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -396,7 +396,8 @@ void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) { LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j); LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j); - --j, --e; + --j; + --e; } // Remove landing pads with no try-ranges. diff --git a/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp index 01d2c2e..fc32183 100644 --- a/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp @@ -104,8 +104,8 @@ void MachineRegionInfoPass::verifyAnalysis() const { void MachineRegionInfoPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequiredTransitive<DominatorTreeWrapperPass>(); - AU.addRequired<PostDominatorTree>(); - AU.addRequired<DominanceFrontier>(); + AU.addRequired<PostDominatorTreeWrapperPass>(); + AU.addRequired<DominanceFrontierWrapperPass>(); } void MachineRegionInfoPass::print(raw_ostream &OS, const Module *) const { @@ -113,7 +113,7 @@ void MachineRegionInfoPass::print(raw_ostream &OS, const Module *) const { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void MachineRegionInfoPass::dump() const { +LLVM_DUMP_METHOD void MachineRegionInfoPass::dump() const { RI.dump(); } #endif diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 03c82f4..613598d 100644 --- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -24,9 +24,8 @@ using namespace llvm; // Pin the vtable to this file. void MachineRegisterInfo::Delegate::anchor() {} -MachineRegisterInfo::MachineRegisterInfo(const MachineFunction *MF) - : MF(MF), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true), - TracksSubRegLiveness(false) { +MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF) + : MF(MF), TheDelegate(nullptr), TracksSubRegLiveness(false) { unsigned NumRegs = getTargetRegisterInfo()->getNumRegs(); VRegInfo.reserve(256); RegAllocHints.reserve(256); @@ -42,6 +41,11 @@ MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) { VRegInfo[Reg].first = RC; } +void MachineRegisterInfo::setRegBank(unsigned Reg, + const RegisterBank &RegBank) { + VRegInfo[Reg].first = &RegBank; +} + const TargetRegisterClass * MachineRegisterInfo::constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, @@ -103,6 +107,32 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ return Reg; } +unsigned +MachineRegisterInfo::getSize(unsigned VReg) const { + VRegToSizeMap::const_iterator SizeIt = getVRegToSize().find(VReg); + return SizeIt != getVRegToSize().end() ? SizeIt->second : 0; +} + +void MachineRegisterInfo::setSize(unsigned VReg, unsigned Size) { + getVRegToSize()[VReg] = Size; +} + +unsigned +MachineRegisterInfo::createGenericVirtualRegister(unsigned Size) { + assert(Size && "Cannot create empty virtual register"); + + // New virtual register number. + unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs()); + VRegInfo.grow(Reg); + // FIXME: Should we use a dummy register class? + VRegInfo[Reg].first = static_cast<TargetRegisterClass *>(nullptr); + getVRegToSize()[Reg] = Size; + RegAllocHints.grow(Reg); + if (TheDelegate) + TheDelegate->MRI_NoteNewVirtualRegister(Reg); + return Reg; +} + /// clearVirtRegs - Remove all virtual registers (after physreg assignment). void MachineRegisterInfo::clearVirtRegs() { #ifndef NDEBUG @@ -471,13 +501,14 @@ static bool isNoReturnDef(const MachineOperand &MO) { !Called->hasFnAttribute(Attribute::NoUnwind)); } -bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg) const { +bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg, + bool SkipNoReturnDef) const { if (UsedPhysRegMask.test(PhysReg)) return true; const TargetRegisterInfo *TRI = getTargetRegisterInfo(); for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) { for (const MachineOperand &MO : make_range(def_begin(*AI), def_end())) { - if (isNoReturnDef(MO)) + if (!SkipNoReturnDef && isNoReturnDef(MO)) continue; return true; } diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp index 71a6eba..47ad60c 100644 --- a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/AlignOf.h" -#include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp index bcee15c..d921e29 100644 --- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp @@ -23,13 +23,13 @@ #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDFS.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include <queue> using namespace llvm; @@ -65,14 +65,20 @@ static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden, static bool ViewMISchedDAGs = false; #endif // NDEBUG +/// Avoid quadratic complexity in unusually large basic blocks by limiting the +/// size of the ready lists. +static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden, + cl::desc("Limit ready list to N instructions"), cl::init(256)); + static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true)); static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true)); -static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, - cl::desc("Enable load clustering."), cl::init(true)); +static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden, + cl::desc("Enable memop clustering."), + cl::init(true)); // Experimental heuristics static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden, @@ -219,6 +225,11 @@ static cl::opt<bool> EnableMachineSched( cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden); +static cl::opt<bool> EnablePostRAMachineSched( + "enable-post-misched", + cl::desc("Enable the post-ra machine instruction scheduling pass."), + cl::init(true), cl::Hidden); + /// Forward declare the standard machine scheduler. This will be used as the /// default scheduler if the target does not set a default. static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C); @@ -314,6 +325,9 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() { /// design would be to split blocks at scheduling boundaries, but LLVM has a /// general bias against block splitting purely for implementation simplicity. bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { + if (skipFunction(*mf.getFunction())) + return false; + if (EnableMachineSched.getNumOccurrences()) { if (!EnableMachineSched) return false; @@ -349,10 +363,13 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { } bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { - if (skipOptnoneFunction(*mf.getFunction())) + if (skipFunction(*mf.getFunction())) return false; - if (!mf.getSubtarget().enablePostRAScheduler()) { + if (EnablePostRAMachineSched.getNumOccurrences()) { + if (!EnablePostRAMachineSched) + return false; + } else if (!mf.getSubtarget().enablePostRAScheduler()) { DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n"); return false; } @@ -389,7 +406,7 @@ static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII) { - return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF); + return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF); } /// Main driver for both MachineScheduler and PostMachineScheduler. @@ -427,7 +444,6 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, // // MBB::size() uses instr_iterator to count. Here we need a bundle to count // as a single instruction. - unsigned RemainingInstrs = std::distance(MBB->begin(), MBB->end()); for(MachineBasicBlock::iterator RegionEnd = MBB->end(); RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) { @@ -435,15 +451,13 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, if (RegionEnd != MBB->end() || isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) { --RegionEnd; - // Count the boundary instruction. - --RemainingInstrs; } // The next region starts above the previous region. Look backward in the // instruction stream until we find the nearest boundary. unsigned NumRegionInstrs = 0; MachineBasicBlock::iterator I = RegionEnd; - for(;I != MBB->begin(); --I, --RemainingInstrs) { + for (;I != MBB->begin(); --I) { if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII)) break; if (!I->isDebugValue()) @@ -466,8 +480,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, << "\n From: " << *I << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; - dbgs() << " RegionInstrs: " << NumRegionInstrs - << " Remaining: " << RemainingInstrs << "\n"); + dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); if (DumpCriticalPathLength) { errs() << MF->getName(); errs() << ":BB# " << MBB->getNumber(); @@ -485,7 +498,6 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, // scheduler for the top of it's scheduled region. RegionEnd = Scheduler.begin(); } - assert(RemainingInstrs == 0 && "Instruction count mismatch!"); Scheduler.finishBlock(); // FIXME: Ideally, no further passes should rely on kill flags. However, // thumb2 size reduction is currently an exception, so the PostMIScheduler @@ -640,7 +652,7 @@ void ScheduleDAGMI::moveInstruction( // Update LiveIntervals if (LIS) - LIS->handleMove(MI, /*UpdateFlags=*/true); + LIS->handleMove(*MI, /*UpdateFlags=*/true); // Recede RegionBegin if an instruction moves above the first. if (RegionBegin == InsertPos) @@ -704,8 +716,7 @@ void ScheduleDAGMI::schedule() { CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); else moveInstruction(MI, CurrentTop); - } - else { + } else { assert(SU->isBottomReady() && "node still has unscheduled dependencies"); MachineBasicBlock::iterator priorII = priorNonDebug(CurrentBottom, CurrentTop); @@ -869,13 +880,19 @@ void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb, SUPressureDiffs.clear(); ShouldTrackPressure = SchedImpl->shouldTrackPressure(); + ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks(); + + assert((!ShouldTrackLaneMasks || ShouldTrackPressure) && + "ShouldTrackLaneMasks requires ShouldTrackPressure"); } // Setup the register pressure trackers for the top scheduled top and bottom // scheduled regions. void ScheduleDAGMILive::initRegPressure() { - TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); - BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); + TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, + ShouldTrackLaneMasks, false); + BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, + ShouldTrackLaneMasks, false); // Close the RPTracker to finalize live ins. RPTracker.closeRegion(); @@ -905,7 +922,7 @@ void ScheduleDAGMILive::initRegPressure() { // Account for liveness generated by the region boundary. if (LiveRegionEnd != RegionEnd) { - SmallVector<unsigned, 8> LiveUses; + SmallVector<RegisterMaskPair, 8> LiveUses; BotRPTracker.recede(&LiveUses); updatePressureDiffs(LiveUses); } @@ -969,47 +986,74 @@ updateScheduledPressure(const SUnit *SU, /// Update the PressureDiff array for liveness after scheduling this /// instruction. -void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) { - for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) { +void ScheduleDAGMILive::updatePressureDiffs( + ArrayRef<RegisterMaskPair> LiveUses) { + for (const RegisterMaskPair &P : LiveUses) { + unsigned Reg = P.RegUnit; /// FIXME: Currently assuming single-use physregs. - unsigned Reg = LiveUses[LUIdx]; - DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n"); if (!TRI->isVirtualRegister(Reg)) continue; - // This may be called before CurrentBottom has been initialized. However, - // BotRPTracker must have a valid position. We want the value live into the - // instruction or live out of the block, so ask for the previous - // instruction's live-out. - const LiveInterval &LI = LIS->getInterval(Reg); - VNInfo *VNI; - MachineBasicBlock::const_iterator I = - nextIfDebug(BotRPTracker.getPos(), BB->end()); - if (I == BB->end()) - VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); - else { - LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I)); - VNI = LRQ.valueIn(); - } - // RegisterPressureTracker guarantees that readsReg is true for LiveUses. - assert(VNI && "No live value at use."); - for (const VReg2SUnit &V2SU - : make_range(VRegUses.find(Reg), VRegUses.end())) { - SUnit *SU = V2SU.SU; - // If this use comes before the reaching def, it cannot be a last use, so - // descrease its pressure change. - if (!SU->isScheduled && SU != &ExitSU) { - LiveQueryResult LRQ - = LI.Query(LIS->getInstructionIndex(SU->getInstr())); - if (LRQ.valueIn() == VNI) { - PressureDiff &PDiff = getPressureDiff(SU); - PDiff.addPressureChange(Reg, true, &MRI); - DEBUG( - dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " - << *SU->getInstr(); - dbgs() << " to "; - PDiff.dump(*TRI); - ); + if (ShouldTrackLaneMasks) { + // If the register has just become live then other uses won't change + // this fact anymore => decrement pressure. + // If the register has just become dead then other uses make it come + // back to life => increment pressure. + bool Decrement = P.LaneMask != 0; + + for (const VReg2SUnit &V2SU + : make_range(VRegUses.find(Reg), VRegUses.end())) { + SUnit &SU = *V2SU.SU; + if (SU.isScheduled || &SU == &ExitSU) + continue; + + PressureDiff &PDiff = getPressureDiff(&SU); + PDiff.addPressureChange(Reg, Decrement, &MRI); + DEBUG( + dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") " + << PrintReg(Reg, TRI) << ':' << PrintLaneMask(P.LaneMask) + << ' ' << *SU.getInstr(); + dbgs() << " to "; + PDiff.dump(*TRI); + ); + } + } else { + assert(P.LaneMask != 0); + DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n"); + // This may be called before CurrentBottom has been initialized. However, + // BotRPTracker must have a valid position. We want the value live into the + // instruction or live out of the block, so ask for the previous + // instruction's live-out. + const LiveInterval &LI = LIS->getInterval(Reg); + VNInfo *VNI; + MachineBasicBlock::const_iterator I = + nextIfDebug(BotRPTracker.getPos(), BB->end()); + if (I == BB->end()) + VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); + else { + LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I)); + VNI = LRQ.valueIn(); + } + // RegisterPressureTracker guarantees that readsReg is true for LiveUses. + assert(VNI && "No live value at use."); + for (const VReg2SUnit &V2SU + : make_range(VRegUses.find(Reg), VRegUses.end())) { + SUnit *SU = V2SU.SU; + // If this use comes before the reaching def, it cannot be a last use, + // so decrease its pressure change. + if (!SU->isScheduled && SU != &ExitSU) { + LiveQueryResult LRQ = + LI.Query(LIS->getInstructionIndex(*SU->getInstr())); + if (LRQ.valueIn() == VNI) { + PressureDiff &PDiff = getPressureDiff(SU); + PDiff.addPressureChange(Reg, true, &MRI); + DEBUG( + dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " + << *SU->getInstr(); + dbgs() << " to "; + PDiff.dump(*TRI); + ); + } } } } @@ -1057,11 +1101,6 @@ void ScheduleDAGMILive::schedule() { // Initialize ready queues now that the DAG and priority data are finalized. initQueues(TopRoots, BotRoots); - if (ShouldTrackPressure) { - assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); - TopRPTracker.setPos(CurrentTop); - } - bool IsTopNode = false; while (true) { DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n"); @@ -1111,14 +1150,14 @@ void ScheduleDAGMILive::buildDAGWithRegPressure() { // Initialize the register pressure tracker used by buildSchedGraph. RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, - /*TrackUntiedDefs=*/true); + ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true); // Account for liveness generate by the region boundary. if (LiveRegionEnd != RegionEnd) RPTracker.recede(); // Build the DAG, and compute current register pressure. - buildSchedGraph(AA, &RPTracker, &SUPressureDiffs); + buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks); // Initialize top/bottom trackers after computing region pressure. initRegPressure(); @@ -1167,10 +1206,8 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { unsigned MaxCyclicLatency = 0; // Visit each live out vreg def to find def/use pairs that cross iterations. - ArrayRef<unsigned> LiveOuts = RPTracker.getPressure().LiveOutRegs; - for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE = LiveOuts.end(); - RI != RE; ++RI) { - unsigned Reg = *RI; + for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) { + unsigned Reg = P.RegUnit; if (!TRI->isVirtualRegister(Reg)) continue; const LiveInterval &LI = LIS->getInterval(Reg); @@ -1193,8 +1230,7 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { continue; // Only consider uses of the phi. - LiveQueryResult LRQ = - LI.Query(LIS->getInstructionIndex(SU->getInstr())); + LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr())); if (!LRQ.valueIn()->isPHIDef()) continue; @@ -1209,8 +1245,7 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { if (LiveInHeight > LiveOutHeight) { if (LiveInHeight - LiveOutHeight < CyclicLatency) CyclicLatency = LiveInHeight - LiveOutHeight; - } - else + } else CyclicLatency = 0; DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" @@ -1223,6 +1258,17 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { return MaxCyclicLatency; } +/// Release ExitSU predecessors and setup scheduler queues. Re-position +/// the Top RP tracker in case the region beginning has changed. +void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots, + ArrayRef<SUnit*> BotRoots) { + ScheduleDAGMI::initQueues(TopRoots, BotRoots); + if (ShouldTrackPressure) { + assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); + TopRPTracker.setPos(CurrentTop); + } +} + /// Move an instruction and update register pressure. void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { // Move the instruction to its new location in the instruction stream. @@ -1239,7 +1285,18 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { if (ShouldTrackPressure) { // Update top scheduled pressure. - TopRPTracker.advance(); + RegisterOperands RegOpers; + RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); + if (ShouldTrackLaneMasks) { + // Adjust liveness and add missing dead+read-undef flags. + SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); + RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); + } else { + // Adjust for missing dead-def flags. + RegOpers.detectDeadDefs(*MI, *LIS); + } + + TopRPTracker.advance(RegOpers); assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); DEBUG( dbgs() << "Top Pressure:\n"; @@ -1248,8 +1305,7 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure); } - } - else { + } else { assert(SU->isBottomReady() && "node still has unscheduled dependencies"); MachineBasicBlock::iterator priorII = priorNonDebug(CurrentBottom, CurrentTop); @@ -1264,9 +1320,20 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { CurrentBottom = MI; } if (ShouldTrackPressure) { - // Update bottom scheduled pressure. - SmallVector<unsigned, 8> LiveUses; - BotRPTracker.recede(&LiveUses); + RegisterOperands RegOpers; + RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); + if (ShouldTrackLaneMasks) { + // Adjust liveness and add missing dead+read-undef flags. + SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); + RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); + } else { + // Adjust for missing dead-def flags. + RegOpers.detectDeadDefs(*MI, *LIS); + } + + BotRPTracker.recedeSkipDebugValues(); + SmallVector<RegisterMaskPair, 8> LiveUses; + BotRPTracker.recede(RegOpers, &LiveUses); assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); DEBUG( dbgs() << "Bottom Pressure:\n"; @@ -1280,64 +1347,81 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { } //===----------------------------------------------------------------------===// -// LoadClusterMutation - DAG post-processing to cluster loads. +// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores. //===----------------------------------------------------------------------===// namespace { /// \brief Post-process the DAG to create cluster edges between neighboring -/// loads. -class LoadClusterMutation : public ScheduleDAGMutation { - struct LoadInfo { +/// loads or between neighboring stores. +class BaseMemOpClusterMutation : public ScheduleDAGMutation { + struct MemOpInfo { SUnit *SU; unsigned BaseReg; - unsigned Offset; - LoadInfo(SUnit *su, unsigned reg, unsigned ofs) - : SU(su), BaseReg(reg), Offset(ofs) {} + int64_t Offset; + MemOpInfo(SUnit *su, unsigned reg, int64_t ofs) + : SU(su), BaseReg(reg), Offset(ofs) {} - bool operator<(const LoadInfo &RHS) const { + bool operator<(const MemOpInfo&RHS) const { return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset); } }; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + bool IsLoad; + public: - LoadClusterMutation(const TargetInstrInfo *tii, - const TargetRegisterInfo *tri) - : TII(tii), TRI(tri) {} + BaseMemOpClusterMutation(const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, bool IsLoad) + : TII(tii), TRI(tri), IsLoad(IsLoad) {} + + void apply(ScheduleDAGInstrs *DAGInstrs) override; - void apply(ScheduleDAGMI *DAG) override; protected: - void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG); + void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG); +}; + +class StoreClusterMutation : public BaseMemOpClusterMutation { +public: + StoreClusterMutation(const TargetInstrInfo *tii, + const TargetRegisterInfo *tri) + : BaseMemOpClusterMutation(tii, tri, false) {} +}; + +class LoadClusterMutation : public BaseMemOpClusterMutation { +public: + LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri) + : BaseMemOpClusterMutation(tii, tri, true) {} }; } // anonymous -void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads, - ScheduleDAGMI *DAG) { - SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords; - for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) { - SUnit *SU = Loads[Idx]; +void BaseMemOpClusterMutation::clusterNeighboringMemOps( + ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) { + SmallVector<MemOpInfo, 32> MemOpRecords; + for (unsigned Idx = 0, End = MemOps.size(); Idx != End; ++Idx) { + SUnit *SU = MemOps[Idx]; unsigned BaseReg; - unsigned Offset; - if (TII->getMemOpBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI)) - LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset)); + int64_t Offset; + if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI)) + MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset)); } - if (LoadRecords.size() < 2) + if (MemOpRecords.size() < 2) return; - std::sort(LoadRecords.begin(), LoadRecords.end()); + + std::sort(MemOpRecords.begin(), MemOpRecords.end()); unsigned ClusterLength = 1; - for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) { - if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) { + for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) { + if (MemOpRecords[Idx].BaseReg != MemOpRecords[Idx+1].BaseReg) { ClusterLength = 1; continue; } - SUnit *SUa = LoadRecords[Idx].SU; - SUnit *SUb = LoadRecords[Idx+1].SU; - if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength) - && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { - - DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU(" + SUnit *SUa = MemOpRecords[Idx].SU; + SUnit *SUb = MemOpRecords[Idx+1].SU; + if (TII->shouldClusterMemOps(*SUa->getInstr(), *SUb->getInstr(), + ClusterLength) && + DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { + DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" << SUb->NodeNum << ")\n"); // Copy successor edges from SUa to SUb. Interleaving computation // dependent on SUa can prevent load combining due to register reuse. @@ -1351,22 +1435,26 @@ void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads, DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial)); } ++ClusterLength; - } - else + } else ClusterLength = 1; } } /// \brief Callback from DAG postProcessing to create cluster edges for loads. -void LoadClusterMutation::apply(ScheduleDAGMI *DAG) { +void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) { + + ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); + // Map DAG NodeNum to store chain ID. DenseMap<unsigned, unsigned> StoreChainIDs; - // Map each store chain to a set of dependent loads. + // Map each store chain to a set of dependent MemOps. SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents; for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { SUnit *SU = &DAG->SUnits[Idx]; - if (!SU->getInstr()->mayLoad()) + if ((IsLoad && !SU->getInstr()->mayLoad()) || + (!IsLoad && !SU->getInstr()->mayStore())) continue; + unsigned ChainPredID = DAG->SUnits.size(); for (SUnit::const_pred_iterator PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { @@ -1376,7 +1464,7 @@ void LoadClusterMutation::apply(ScheduleDAGMI *DAG) { } } // Check if this chain-like pred has been seen - // before. ChainPredID==MaxNodeID for loads at the top of the schedule. + // before. ChainPredID==MaxNodeID at the top of the schedule. unsigned NumChains = StoreChainDependents.size(); std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result = StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains)); @@ -1384,9 +1472,10 @@ void LoadClusterMutation::apply(ScheduleDAGMI *DAG) { StoreChainDependents.resize(NumChains + 1); StoreChainDependents[Result.first->second].push_back(SU); } + // Iterate over the store chains. for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx) - clusterNeighboringLoads(StoreChainDependents[Idx], DAG); + clusterNeighboringMemOps(StoreChainDependents[Idx], DAG); } //===----------------------------------------------------------------------===// @@ -1403,7 +1492,7 @@ public: MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) : TII(TII), TRI(TRI) {} - void apply(ScheduleDAGMI *DAG) override; + void apply(ScheduleDAGInstrs *DAGInstrs) override; }; } // anonymous @@ -1423,7 +1512,9 @@ static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI, /// \brief Callback from DAG postProcessing to create cluster edges to encourage /// fused operations. -void MacroFusion::apply(ScheduleDAGMI *DAG) { +void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) { + ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); + // For now, assume targets can only fuse with the branch. SUnit &ExitSU = DAG->ExitSU; MachineInstr *Branch = ExitSU.getInstr(); @@ -1439,7 +1530,7 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) { if (!HasDataDep(TRI, *Branch, *Pred)) continue; - if (!TII.shouldScheduleAdjacent(Pred, Branch)) + if (!TII.shouldScheduleAdjacent(*Pred, *Branch)) continue; // Create a single weak edge from SU to ExitSU. The only effect is to cause @@ -1474,7 +1565,7 @@ class CopyConstrain : public ScheduleDAGMutation { public: CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {} - void apply(ScheduleDAGMI *DAG) override; + void apply(ScheduleDAGInstrs *DAGInstrs) override; protected: void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG); @@ -1505,12 +1596,14 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { MachineInstr *Copy = CopySU->getInstr(); // Check for pure vreg copies. - unsigned SrcReg = Copy->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + const MachineOperand &SrcOp = Copy->getOperand(1); + unsigned SrcReg = SrcOp.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg()) return; - unsigned DstReg = Copy->getOperand(0).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + const MachineOperand &DstOp = Copy->getOperand(0); + unsigned DstReg = DstOp.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead()) return; // Check if either the dest or source is local. If it's live across a back @@ -1627,15 +1720,16 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { /// \brief Callback from DAG postProcessing to create weak edges to encourage /// copy elimination. -void CopyConstrain::apply(ScheduleDAGMI *DAG) { +void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) { + ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals"); MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end()); if (FirstPos == DAG->end()) return; - RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos); + RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos); RegionEndIdx = DAG->getLIS()->getInstructionIndex( - &*priorNonDebug(DAG->end(), DAG->begin())); + *priorNonDebug(DAG->end(), DAG->begin())); for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { SUnit *SU = &DAG->SUnits[Idx]; @@ -1862,7 +1956,8 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { // Check for interlocks first. For the purpose of other heuristics, an // instruction that cannot issue appears as if it's not in the ReadyQueue. bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; - if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU)) + if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) || + Available.size() >= ReadyListLimit) Pending.push(SU); else Available.push(SU); @@ -1905,8 +2000,7 @@ void SchedBoundary::bumpCycle(unsigned NextCycle) { if (!HazardRec->isEnabled()) { // Bypass HazardRec virtual calls. CurrCycle = NextCycle; - } - else { + } else { // Bypass getHazardType calls in case of long latency. for (; CurrCycle != NextCycle; ++CurrCycle) { if (isTop()) @@ -2074,8 +2168,7 @@ void SchedBoundary::bumpNode(SUnit *SU) { // If we stall for any reason, bump the cycle. if (NextCycle > CurrCycle) { bumpCycle(NextCycle); - } - else { + } else { // After updating ZoneCritResIdx and ExpectedLatency, check if we're // resource limited. If a stall occurred, bumpCycle does this. unsigned LFactor = SchedModel->getLatencyFactor(); @@ -2119,11 +2212,13 @@ void SchedBoundary::releasePending() { if (checkHazard(SU)) continue; + if (Available.size() >= ReadyListLimit) + break; + Available.push(SU); Pending.remove(Pending.begin()+i); --i; --e; } - DEBUG(if (!Pending.empty()) Pending.dump()); CheckPending = false; } @@ -2163,6 +2258,10 @@ SUnit *SchedBoundary::pickOnlyChoice() { bumpCycle(CurrCycle + 1); releasePending(); } + + DEBUG(Pending.dump()); + DEBUG(Available.dump()); + if (Available.size() == 1) return *Available.begin(); return nullptr; @@ -2177,8 +2276,7 @@ void SchedBoundary::dumpScheduledState() { if (ZoneCritResIdx) { ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx); ResCount = getResourceCount(ZoneCritResIdx); - } - else { + } else { ResFactor = SchedModel->getMicroOpFactor(); ResCount = RetiredMOps * SchedModel->getMicroOpFactor(); } @@ -2218,8 +2316,7 @@ initResourceDelta(const ScheduleDAGMI *DAG, /// Set the CandPolicy given a scheduling zone given the current resources and /// latencies inside and outside the zone. -void GenericSchedulerBase::setPolicy(CandPolicy &Policy, - bool IsPostRA, +void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone) { // Apply preemptive heuristics based on the total latency and resources @@ -2295,7 +2392,8 @@ const char *GenericSchedulerBase::getReasonStr( GenericSchedulerBase::CandReason Reason) { switch (Reason) { case NoCand: return "NOCAND "; - case PhysRegCopy: return "PREG-COPY"; + case Only1: return "ONLY1 "; + case PhysRegCopy: return "PREG-COPY "; case RegExcess: return "REG-EXCESS"; case RegCritical: return "REG-CRIT "; case Stall: return "STALL "; @@ -2381,7 +2479,6 @@ static bool tryLess(int TryVal, int CandVal, Cand.Reason = Reason; return true; } - Cand.setRepeat(Reason); return false; } @@ -2398,7 +2495,6 @@ static bool tryGreater(int TryVal, int CandVal, Cand.Reason = Reason; return true; } - Cand.setRepeat(Reason); return false; } @@ -2414,8 +2510,7 @@ static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), TryCand, Cand, GenericSchedulerBase::TopPathReduce)) return true; - } - else { + } else { if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), TryCand, Cand, GenericSchedulerBase::BotHeightReduce)) @@ -2428,10 +2523,13 @@ static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, return false; } -static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand, - bool IsTop) { +static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) { DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") - << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n'); + << GenericSchedulerBase::getReasonStr(Reason) << '\n'); +} + +static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) { + tracePick(Cand.Reason, Cand.AtTop); } void GenericScheduler::initialize(ScheduleDAGMI *dag) { @@ -2460,6 +2558,8 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) { DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( Itin, DAG); } + TopCand.SU = nullptr; + BotCand.SU = nullptr; } /// Initialize the per-region scheduling policy. @@ -2487,8 +2587,7 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, RegionPolicy.OnlyBottomUp = true; // Allow the subtarget to override default policy. - MF.getSubtarget().overrideSchedPolicy(RegionPolicy, Begin, End, - NumRegionInstrs); + MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs); // After subtarget overrides, apply command line options. if (!EnableRegPressure) @@ -2582,19 +2681,25 @@ static bool tryPressure(const PressureChange &TryP, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF) { - unsigned TryPSet = TryP.getPSetOrMax(); - unsigned CandPSet = CandP.getPSetOrMax(); - // If both candidates affect the same set, go with the smallest increase. - if (TryPSet == CandPSet) { - return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand, - Reason); - } // If one candidate decreases and the other increases, go with it. // Invalid candidates have UnitInc==0. if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, Reason)) { return true; } + // Do not compare the magnitude of pressure changes between top and bottom + // boundary. + if (Cand.AtTop != TryCand.AtTop) + return false; + + // If both candidates affect the same set in the same boundary, go with the + // smallest increase. + unsigned TryPSet = TryP.getPSetOrMax(); + unsigned CandPSet = CandP.getPSetOrMax(); + if (TryPSet == CandPSet) { + return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand, + Reason); + } int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) : std::numeric_limits<int>::max(); @@ -2640,64 +2745,64 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) { return 0; } -/// Apply a set of heursitics to a new candidate. Heuristics are currently -/// hierarchical. This may be more efficient than a graduated cost model because -/// we don't need to evaluate all aspects of the model for each node in the -/// queue. But it's really done to make the heuristics easier to debug and -/// statistically analyze. -/// -/// \param Cand provides the policy and current best candidate. -/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. -/// \param Zone describes the scheduled zone that we are extending. -/// \param RPTracker describes reg pressure within the scheduled zone. -/// \param TempTracker is a scratch pressure tracker to reuse in queries. -void GenericScheduler::tryCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand, - SchedBoundary &Zone, - const RegPressureTracker &RPTracker, - RegPressureTracker &TempTracker) { - +void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU, + bool AtTop, + const RegPressureTracker &RPTracker, + RegPressureTracker &TempTracker) { + Cand.SU = SU; + Cand.AtTop = AtTop; if (DAG->isTrackingPressure()) { - // Always initialize TryCand's RPDelta. - if (Zone.isTop()) { + if (AtTop) { TempTracker.getMaxDownwardPressureDelta( - TryCand.SU->getInstr(), - TryCand.RPDelta, + Cand.SU->getInstr(), + Cand.RPDelta, DAG->getRegionCriticalPSets(), DAG->getRegPressure().MaxSetPressure); - } - else { + } else { if (VerifyScheduling) { TempTracker.getMaxUpwardPressureDelta( - TryCand.SU->getInstr(), - &DAG->getPressureDiff(TryCand.SU), - TryCand.RPDelta, + Cand.SU->getInstr(), + &DAG->getPressureDiff(Cand.SU), + Cand.RPDelta, DAG->getRegionCriticalPSets(), DAG->getRegPressure().MaxSetPressure); - } - else { + } else { RPTracker.getUpwardPressureDelta( - TryCand.SU->getInstr(), - DAG->getPressureDiff(TryCand.SU), - TryCand.RPDelta, + Cand.SU->getInstr(), + DAG->getPressureDiff(Cand.SU), + Cand.RPDelta, DAG->getRegionCriticalPSets(), DAG->getRegPressure().MaxSetPressure); } } } - DEBUG(if (TryCand.RPDelta.Excess.isValid()) - dbgs() << " Try SU(" << TryCand.SU->NodeNum << ") " - << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet()) - << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n"); + DEBUG(if (Cand.RPDelta.Excess.isValid()) + dbgs() << " Try SU(" << Cand.SU->NodeNum << ") " + << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) + << ":" << Cand.RPDelta.Excess.getUnitInc() << "\n"); +} +/// Apply a set of heursitics to a new candidate. Heuristics are currently +/// hierarchical. This may be more efficient than a graduated cost model because +/// we don't need to evaluate all aspects of the model for each node in the +/// queue. But it's really done to make the heuristics easier to debug and +/// statistically analyze. +/// +/// \param Cand provides the policy and current best candidate. +/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. +/// \param Zone describes the scheduled zone that we are extending, or nullptr +// if Cand is from a different zone than TryCand. +void GenericScheduler::tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) { // Initialize the candidate if needed. if (!Cand.isValid()) { TryCand.Reason = NodeOrder; return; } - if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()), - biasPhysRegCopy(Cand.SU, Zone.isTop()), + if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop), + biasPhysRegCopy(Cand.SU, Cand.AtTop), TryCand, Cand, PhysRegCopy)) return; @@ -2715,17 +2820,26 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, DAG->MF)) return; - // For loops that are acyclic path limited, aggressively schedule for latency. - // This can result in very long dependence chains scheduled in sequence, so - // once every cycle (when CurrMOps == 0), switch to normal heuristics. - if (Rem.IsAcyclicLatencyLimited && !Zone.getCurrMOps() - && tryLatency(TryCand, Cand, Zone)) - return; + // We only compare a subset of features when comparing nodes between + // Top and Bottom boundary. Some properties are simply incomparable, in many + // other instances we should only override the other boundary if something + // is a clear good pick on one boundary. Skip heuristics that are more + // "tie-breaking" in nature. + bool SameBoundary = Zone != nullptr; + if (SameBoundary) { + // For loops that are acyclic path limited, aggressively schedule for + // latency. This can result in very long dependence chains scheduled in + // sequence, so once every cycle (when CurrMOps == 0), switch to normal + // heuristics. + if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && + tryLatency(TryCand, Cand, *Zone)) + return; - // Prioritize instructions that read unbuffered resources by stall cycles. - if (tryLess(Zone.getLatencyStallCycles(TryCand.SU), - Zone.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) - return; + // Prioritize instructions that read unbuffered resources by stall cycles. + if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), + Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) + return; + } // Keep clustered nodes together to encourage downstream peephole // optimizations which may reduce resource requirements. @@ -2733,18 +2847,23 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, // This is a best effort to set things up for a post-RA pass. Optimizations // like generating loads of multiple registers should ideally be done within // the scheduler pass by combining the loads during DAG postprocessing. - const SUnit *NextClusterSU = - Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); - if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU, + const SUnit *CandNextClusterSU = + Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + const SUnit *TryCandNextClusterSU = + TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + if (tryGreater(TryCand.SU == TryCandNextClusterSU, + Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster)) return; - // Weak edges are for clustering and other constraints. - if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()), - getWeakLeft(Cand.SU, Zone.isTop()), - TryCand, Cand, Weak)) { - return; + if (SameBoundary) { + // Weak edges are for clustering and other constraints. + if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), + getWeakLeft(Cand.SU, Cand.AtTop), + TryCand, Cand, Weak)) + return; } + // Avoid increasing the max pressure of the entire region. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, @@ -2752,34 +2871,35 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, DAG->MF)) return; - // Avoid critical resource consumption and balance the schedule. - TryCand.initResourceDelta(DAG, SchedModel); - if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, - TryCand, Cand, ResourceReduce)) - return; - if (tryGreater(TryCand.ResDelta.DemandedResources, - Cand.ResDelta.DemandedResources, - TryCand, Cand, ResourceDemand)) - return; + if (SameBoundary) { + // Avoid critical resource consumption and balance the schedule. + TryCand.initResourceDelta(DAG, SchedModel); + if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, + TryCand, Cand, ResourceReduce)) + return; + if (tryGreater(TryCand.ResDelta.DemandedResources, + Cand.ResDelta.DemandedResources, + TryCand, Cand, ResourceDemand)) + return; - // Avoid serializing long latency dependence chains. - // For acyclic path limited loops, latency was already checked above. - if (!RegionPolicy.DisableLatencyHeuristic && Cand.Policy.ReduceLatency && - !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone)) { - return; - } + // Avoid serializing long latency dependence chains. + // For acyclic path limited loops, latency was already checked above. + if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && + !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) + return; - // Prefer immediate defs/users of the last scheduled instruction. This is a - // local pressure avoidance strategy that also makes the machine code - // readable. - if (tryGreater(Zone.isNextSU(TryCand.SU), Zone.isNextSU(Cand.SU), - TryCand, Cand, NextDefUse)) - return; + // Prefer immediate defs/users of the last scheduled instruction. This is a + // local pressure avoidance strategy that also makes the machine code + // readable. + if (tryGreater(Zone->isNextSU(TryCand.SU), Zone->isNextSU(Cand.SU), + TryCand, Cand, NextDefUse)) + return; - // Fall through to original instruction order. - if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) - || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { - TryCand.Reason = NodeOrder; + // Fall through to original instruction order. + if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) + || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { + TryCand.Reason = NodeOrder; + } } } @@ -2789,20 +2909,20 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand, /// DAG building. To adjust for the current scheduling location we need to /// maintain the number of vreg uses remaining to be top-scheduled. void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, + const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand) { - ReadyQueue &Q = Zone.Available; - - DEBUG(Q.dump()); - // getMaxPressureDelta temporarily modifies the tracker. RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); + ReadyQueue &Q = Zone.Available; for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { - SchedCandidate TryCand(Cand.Policy); - TryCand.SU = *I; - tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker); + SchedCandidate TryCand(ZonePolicy); + initCandidate(TryCand, *I, Zone.isTop(), RPTracker, TempTracker); + // Pass SchedBoundary only when comparing nodes from the same boundary. + SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; + tryCandidate(Cand, TryCand, ZoneArg); if (TryCand.Reason != NoCand) { // Initialize resource delta if needed in case future heuristics query it. if (TryCand.ResDelta == SchedResourceDelta()) @@ -2819,57 +2939,77 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { // efficient, but also provides the best heuristics for CriticalPSets. if (SUnit *SU = Bot.pickOnlyChoice()) { IsTopNode = false; - DEBUG(dbgs() << "Pick Bot ONLY1\n"); + tracePick(Only1, false); return SU; } if (SUnit *SU = Top.pickOnlyChoice()) { IsTopNode = true; - DEBUG(dbgs() << "Pick Top ONLY1\n"); + tracePick(Only1, true); return SU; } - CandPolicy NoPolicy; - SchedCandidate BotCand(NoPolicy); - SchedCandidate TopCand(NoPolicy); // Set the bottom-up policy based on the state of the current bottom zone and // the instructions outside the zone, including the top zone. - setPolicy(BotCand.Policy, /*IsPostRA=*/false, Bot, &Top); + CandPolicy BotPolicy; + setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); // Set the top-down policy based on the state of the current top zone and // the instructions outside the zone, including the bottom zone. - setPolicy(TopCand.Policy, /*IsPostRA=*/false, Top, &Bot); - - // Prefer bottom scheduling when heuristics are silent. - pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); - assert(BotCand.Reason != NoCand && "failed to find the first candidate"); - - // If either Q has a single candidate that provides the least increase in - // Excess pressure, we can immediately schedule from that Q. - // - // RegionCriticalPSets summarizes the pressure within the scheduled region and - // affects picking from either Q. If scheduling in one direction must - // increase pressure for one of the excess PSets, then schedule in that - // direction first to provide more freedom in the other direction. - if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess)) - || (BotCand.Reason == RegCritical - && !BotCand.isRepeat(RegCritical))) - { - IsTopNode = false; - tracePick(BotCand, IsTopNode); - return BotCand.SU; + CandPolicy TopPolicy; + setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); + + // See if BotCand is still valid (because we previously scheduled from Top). + DEBUG(dbgs() << "Picking from Bot:\n"); + if (!BotCand.isValid() || BotCand.SU->isScheduled || + BotCand.Policy != BotPolicy) { + BotCand.reset(CandPolicy()); + pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); + assert(BotCand.Reason != NoCand && "failed to find the first candidate"); + } else { + DEBUG(traceCandidate(BotCand)); +#ifndef NDEBUG + if (VerifyScheduling) { + SchedCandidate TCand; + TCand.reset(CandPolicy()); + pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand); + assert(TCand.SU == BotCand.SU && + "Last pick result should correspond to re-picking right now"); + } +#endif } + // Check if the top Q has a better candidate. - pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); - assert(TopCand.Reason != NoCand && "failed to find the first candidate"); + DEBUG(dbgs() << "Picking from Top:\n"); + if (!TopCand.isValid() || TopCand.SU->isScheduled || + TopCand.Policy != TopPolicy) { + TopCand.reset(CandPolicy()); + pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); + assert(TopCand.Reason != NoCand && "failed to find the first candidate"); + } else { + DEBUG(traceCandidate(TopCand)); +#ifndef NDEBUG + if (VerifyScheduling) { + SchedCandidate TCand; + TCand.reset(CandPolicy()); + pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand); + assert(TCand.SU == TopCand.SU && + "Last pick result should correspond to re-picking right now"); + } +#endif + } - // Choose the queue with the most important (lowest enum) reason. - if (TopCand.Reason < BotCand.Reason) { - IsTopNode = true; - tracePick(TopCand, IsTopNode); - return TopCand.SU; + // Pick best from BotCand and TopCand. + assert(BotCand.isValid()); + assert(TopCand.isValid()); + SchedCandidate Cand = BotCand; + TopCand.Reason = NoCand; + tryCandidate(Cand, TopCand, nullptr); + if (TopCand.Reason != NoCand) { + Cand.setBest(TopCand); + DEBUG(traceCandidate(Cand)); } - // Otherwise prefer the bottom candidate, in node order if all else failed. - IsTopNode = false; - tracePick(BotCand, IsTopNode); - return BotCand.SU; + + IsTopNode = Cand.AtTop; + tracePick(Cand); + return Cand.SU; } /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. @@ -2885,27 +3025,25 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) { SU = Top.pickOnlyChoice(); if (!SU) { CandPolicy NoPolicy; - SchedCandidate TopCand(NoPolicy); - pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); + TopCand.reset(NoPolicy); + pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); assert(TopCand.Reason != NoCand && "failed to find a candidate"); - tracePick(TopCand, true); + tracePick(TopCand); SU = TopCand.SU; } IsTopNode = true; - } - else if (RegionPolicy.OnlyBottomUp) { + } else if (RegionPolicy.OnlyBottomUp) { SU = Bot.pickOnlyChoice(); if (!SU) { CandPolicy NoPolicy; - SchedCandidate BotCand(NoPolicy); - pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); + BotCand.reset(NoPolicy); + pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); assert(BotCand.Reason != NoCand && "failed to find a candidate"); - tracePick(BotCand, false); + tracePick(BotCand); SU = BotCand.SU; } IsTopNode = false; - } - else { + } else { SU = pickNodeBidirectional(IsTopNode); } } while (SU->isScheduled); @@ -2957,8 +3095,7 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { Top.bumpNode(SU); if (SU->hasPhysRegUses) reschedulePhysRegCopies(SU, true); - } - else { + } else { SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); Bot.bumpNode(SU); if (SU->hasPhysRegDefs) @@ -2976,8 +3113,12 @@ static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) { // data and pass it to later mutations. Have a single mutation that gathers // the interesting nodes in one pass. DAG->addMutation(make_unique<CopyConstrain>(DAG->TII, DAG->TRI)); - if (EnableLoadCluster && DAG->TII->enableClusterLoads()) - DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI)); + if (EnableMemOpCluster) { + if (DAG->TII->enableClusterLoads()) + DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI)); + if (DAG->TII->enableClusterStores()) + DAG->addMutation(make_unique<StoreClusterMutation>(DAG->TII, DAG->TRI)); + } if (EnableMacroFusion) DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI)); return DAG; @@ -3065,12 +3206,10 @@ void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { ReadyQueue &Q = Top.Available; - - DEBUG(Q.dump()); - for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { SchedCandidate TryCand(Cand.Policy); TryCand.SU = *I; + TryCand.AtTop = true; TryCand.initResourceDelta(DAG, SchedModel); tryCandidate(Cand, TryCand); if (TryCand.Reason != NoCand) { @@ -3089,7 +3228,9 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { SUnit *SU; do { SU = Top.pickOnlyChoice(); - if (!SU) { + if (SU) { + tracePick(Only1, true); + } else { CandPolicy NoPolicy; SchedCandidate TopCand(NoPolicy); // Set the top-down policy based on the state of the current top zone and @@ -3097,7 +3238,7 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr); pickNodeFromQueue(TopCand); assert(TopCand.Reason != NoCand && "failed to find a candidate"); - tracePick(TopCand, true); + tracePick(TopCand); SU = TopCand.SU; } } while (SU->isScheduled); @@ -3285,8 +3426,7 @@ public: TopQ.pop(); } while (SU->isScheduled); IsTopNode = true; - } - else { + } else { do { if (BottomQ.empty()) return nullptr; SU = BottomQ.top(); diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp index 5e6d619..571a5c1 100644 --- a/contrib/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -104,7 +105,7 @@ namespace { private: bool ProcessBlock(MachineBasicBlock &MBB); - bool isWorthBreakingCriticalEdge(MachineInstr *MI, + bool isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To); /// \brief Postpone the splitting of the given critical @@ -119,27 +120,27 @@ namespace { /// /// \return True if the edge is marked as toSplit, false otherwise. /// False can be returned if, for instance, this is not profitable. - bool PostponeSplitCriticalEdge(MachineInstr *MI, + bool PostponeSplitCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To, bool BreakPHIEdge); - bool SinkInstruction(MachineInstr *MI, bool &SawStore, + bool SinkInstruction(MachineInstr &MI, bool &SawStore, AllSuccsCache &AllSuccessors); bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB, MachineBasicBlock *DefMBB, bool &BreakPHIEdge, bool &LocalUse) const; - MachineBasicBlock *FindSuccToSinkTo(MachineInstr *MI, MachineBasicBlock *MBB, + MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, bool &BreakPHIEdge, AllSuccsCache &AllSuccessors); - bool isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, + bool isProfitableToSinkTo(unsigned Reg, MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *SuccToSinkTo, AllSuccsCache &AllSuccessors); - bool PerformTrivialForwardCoalescing(MachineInstr *MI, + bool PerformTrivialForwardCoalescing(MachineInstr &MI, MachineBasicBlock *MBB); SmallVector<MachineBasicBlock *, 4> & - GetAllSortedSuccessors(MachineInstr *MI, MachineBasicBlock *MBB, + GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccsCache &AllSuccessors) const; }; } // end anonymous namespace @@ -154,13 +155,13 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineSinking, "machine-sink", "Machine code sinking", false, false) -bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI, +bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI, MachineBasicBlock *MBB) { - if (!MI->isCopy()) + if (!MI.isCopy()) return false; - unsigned SrcReg = MI->getOperand(1).getReg(); - unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(1).getReg(); + unsigned DstReg = MI.getOperand(0).getReg(); if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !TargetRegisterInfo::isVirtualRegister(DstReg) || !MRI->hasOneNonDBGUse(SrcReg)) @@ -175,9 +176,9 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI, if (DefMI->isCopyLike()) return false; DEBUG(dbgs() << "Coalescing: " << *DefMI); - DEBUG(dbgs() << "*** to: " << *MI); + DEBUG(dbgs() << "*** to: " << MI); MRI->replaceRegWith(DstReg, SrcReg); - MI->eraseFromParent(); + MI.eraseFromParent(); // Conservatively, clear any kill flags, since it's possible that they are no // longer correct. @@ -256,7 +257,7 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg, } bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { - if (skipOptnoneFunction(*MF.getFunction())) + if (skipFunction(*MF.getFunction())) return false; DEBUG(dbgs() << "******** Machine Sinking ********\n"); @@ -283,7 +284,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { // If we have anything we marked as toSplit, split it now. for (auto &Pair : ToSplit) { - auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, this); + auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this); if (NewSucc != nullptr) { DEBUG(dbgs() << " *** Splitting critical edge:" " BB#" << Pair.first->getNumber() @@ -326,7 +327,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { --I; bool ProcessedBegin, SawStore = false; do { - MachineInstr *MI = I; // The instruction to sink. + MachineInstr &MI = *I; // The instruction to sink. // Predecrement I (if it's not begin) so that it isn't invalidated by // sinking. @@ -334,7 +335,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { if (!ProcessedBegin) --I; - if (MI->isDebugValue()) + if (MI.isDebugValue()) continue; bool Joined = PerformTrivialForwardCoalescing(MI, &MBB); @@ -343,8 +344,10 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { continue; } - if (SinkInstruction(MI, SawStore, AllSuccessors)) - ++NumSunk, MadeChange = true; + if (SinkInstruction(MI, SawStore, AllSuccessors)) { + ++NumSunk; + MadeChange = true; + } // If we just processed the first instruction in the block, we're done. } while (!ProcessedBegin); @@ -352,7 +355,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { return MadeChange; } -bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI, +bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To) { // FIXME: Need much better heuristics. @@ -363,14 +366,14 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI, if (!CEBCandidates.insert(std::make_pair(From, To)).second) return true; - if (!MI->isCopy() && !TII->isAsCheapAsAMove(MI)) + if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI)) return true; // MI is cheap, we probably don't want to break the critical edge for it. // However, if this would allow some definitions of its source operands // to be sunk then it's probably worth it. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; unsigned Reg = MO.getReg(); @@ -391,7 +394,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI, // If definition resides elsewhere, we aren't // blocking it from being sunk so don't break the edge. MachineInstr *DefMI = MRI->getVRegDef(Reg); - if (DefMI->getParent() == MI->getParent()) + if (DefMI->getParent() == MI.getParent()) return true; } } @@ -399,7 +402,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI, return false; } -bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr *MI, +bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, MachineBasicBlock *FromBB, MachineBasicBlock *ToBB, bool BreakPHIEdge) { @@ -469,35 +472,30 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr *MI, return true; } -static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) { - return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence(); -} - /// collectDebgValues - Scan instructions following MI and collect any /// matching DBG_VALUEs. -static void collectDebugValues(MachineInstr *MI, +static void collectDebugValues(MachineInstr &MI, SmallVectorImpl<MachineInstr *> &DbgValues) { DbgValues.clear(); - if (!MI->getOperand(0).isReg()) + if (!MI.getOperand(0).isReg()) return; MachineBasicBlock::iterator DI = MI; ++DI; - for (MachineBasicBlock::iterator DE = MI->getParent()->end(); + for (MachineBasicBlock::iterator DE = MI.getParent()->end(); DI != DE; ++DI) { if (!DI->isDebugValue()) return; if (DI->getOperand(0).isReg() && - DI->getOperand(0).getReg() == MI->getOperand(0).getReg()) - DbgValues.push_back(DI); + DI->getOperand(0).getReg() == MI.getOperand(0).getReg()) + DbgValues.push_back(&*DI); } } /// isProfitableToSinkTo - Return true if it is profitable to sink MI. -bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, +bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *SuccToSinkTo, AllSuccsCache &AllSuccessors) { - assert (MI && "Invalid MachineInstr!"); assert (SuccToSinkTo && "Invalid SinkTo Candidate BB"); if (MBB == SuccToSinkTo) @@ -538,7 +536,7 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, /// Get the sorted sequence of successors for this MachineBasicBlock, possibly /// computing it if it was not already cached. SmallVector<MachineBasicBlock *, 4> & -MachineSinking::GetAllSortedSuccessors(MachineInstr *MI, MachineBasicBlock *MBB, +MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccsCache &AllSuccessors) const { // Do we have the sorted successors in cache ? @@ -560,7 +558,7 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr *MI, MachineBasicBlock *MBB, DT->getNode(MBB)->getChildren(); for (const auto &DTChild : Children) // DomTree children of MBB that have MBB as immediate dominator are added. - if (DTChild->getIDom()->getBlock() == MI->getParent() && + if (DTChild->getIDom()->getBlock() == MI.getParent() && // Skip MBBs already added to the AllSuccs vector above. !MBB->isSuccessor(DTChild->getBlock())) AllSuccs.push_back(DTChild->getBlock()); @@ -582,12 +580,10 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr *MI, MachineBasicBlock *MBB, } /// FindSuccToSinkTo - Find a successor to sink this instruction to. -MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, - MachineBasicBlock *MBB, - bool &BreakPHIEdge, - AllSuccsCache &AllSuccessors) { - - assert (MI && "Invalid MachineInstr!"); +MachineBasicBlock * +MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, + bool &BreakPHIEdge, + AllSuccsCache &AllSuccessors) { assert (MBB && "Invalid MachineBasicBlock!"); // Loop over all the operands of the specified instruction. If there is @@ -596,8 +592,8 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // SuccToSinkTo - This is the successor to sink this instruction to, once we // decide. MachineBasicBlock *SuccToSinkTo = nullptr; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; // Ignore non-register operands. unsigned Reg = MO.getReg(); @@ -673,22 +669,70 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, return SuccToSinkTo; } +/// \brief Return true if MI is likely to be usable as a memory operation by the +/// implicit null check optimization. +/// +/// This is a "best effort" heuristic, and should not be relied upon for +/// correctness. This returning true does not guarantee that the implicit null +/// check optimization is legal over MI, and this returning false does not +/// guarantee MI cannot possibly be used to do a null check. +static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + typedef TargetInstrInfo::MachineBranchPredicate MachineBranchPredicate; + + auto *MBB = MI.getParent(); + if (MBB->pred_size() != 1) + return false; + + auto *PredMBB = *MBB->pred_begin(); + auto *PredBB = PredMBB->getBasicBlock(); + + // Frontends that don't use implicit null checks have no reason to emit + // branches with make.implicit metadata, and this function should always + // return false for them. + if (!PredBB || + !PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit)) + return false; + + unsigned BaseReg; + int64_t Offset; + if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI)) + return false; + + if (!(MI.mayLoad() && !MI.isPredicable())) + return false; + + MachineBranchPredicate MBP; + if (TII->analyzeBranchPredicate(*PredMBB, MBP, false)) + return false; + + return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 && + (MBP.Predicate == MachineBranchPredicate::PRED_NE || + MBP.Predicate == MachineBranchPredicate::PRED_EQ) && + MBP.LHS.getReg() == BaseReg; +} + /// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. -bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore, +bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, AllSuccsCache &AllSuccessors) { - // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to - // be close to the source to make it easier to coalesce. - if (AvoidsSinking(MI, MRI)) + // Don't sink instructions that the target prefers not to sink. + if (!TII->shouldSink(MI)) return false; // Check if it's safe to move the instruction. - if (!MI->isSafeToMove(AA, SawStore)) + if (!MI.isSafeToMove(AA, SawStore)) return false; // Convergent operations may not be made control-dependent on additional // values. - if (MI->isConvergent()) + if (MI.isConvergent()) + return false; + + // Don't break implicit null checks. This is a performance heuristic, and not + // required for correctness. + if (SinkingPreventsImplicitNullCheck(MI, TII, TRI)) return false; // FIXME: This should include support for sinking instructions within the @@ -700,7 +744,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore, // and z and only shrink the live range of x. bool BreakPHIEdge = false; - MachineBasicBlock *ParentBlock = MI->getParent(); + MachineBasicBlock *ParentBlock = MI.getParent(); MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge, AllSuccessors); @@ -712,8 +756,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore, // If the instruction to move defines a dead physical register which is live // when leaving the basic block, don't move it because it could turn into a // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>) - for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { - const MachineOperand &MO = MI->getOperand(I); + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI.getOperand(I); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; @@ -721,7 +765,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore, return false; } - DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo); + DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo); // If the block has multiple predecessors, this is a critical edge. // Decide if we can sink along it or need to break the edge. @@ -730,7 +774,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore, // other code paths. bool TryBreak = false; bool store = true; - if (!MI->isSafeToMove(AA, store)) { + if (!MI.isSafeToMove(AA, store)) { DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n"); TryBreak = true; } @@ -804,7 +848,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore, // Note that we have to clear the kill flags for any register this instruction // uses as we may sink over another instruction which currently kills the // used registers. - for (MachineOperand &MO : MI->operands()) { + for (MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isUse()) RegsToClearKillFlags.set(MO.getReg()); // Remember to clear kill flags. } diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp index f7edacd..86332c8 100644 --- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -328,8 +328,10 @@ MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) { continue; // Pick the predecessor that would give this block the smallest InstrDepth. unsigned Depth = PredTBI->InstrDepth + CurCount; - if (!Best || Depth < BestDepth) - Best = Pred, BestDepth = Depth; + if (!Best || Depth < BestDepth) { + Best = Pred; + BestDepth = Depth; + } } return Best; } @@ -356,8 +358,10 @@ MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) { continue; // Pick the successor that would give this block the smallest InstrHeight. unsigned Height = SuccTBI->InstrHeight; - if (!Best || Height < BestHeight) - Best = Succ, BestHeight = Height; + if (!Best || Height < BestHeight) { + Best = Succ; + BestHeight = Height; + } } return Best; } @@ -621,16 +625,16 @@ struct DataDep { // Get the input data dependencies that must be ready before UseMI can issue. // Return true if UseMI has any physreg operands. -static bool getDataDeps(const MachineInstr *UseMI, +static bool getDataDeps(const MachineInstr &UseMI, SmallVectorImpl<DataDep> &Deps, const MachineRegisterInfo *MRI) { // Debug values should not be included in any calculations. - if (UseMI->isDebugValue()) + if (UseMI.isDebugValue()) return false; bool HasPhysRegs = false; - for (MachineInstr::const_mop_iterator I = UseMI->operands_begin(), - E = UseMI->operands_end(); I != E; ++I) { + for (MachineInstr::const_mop_iterator I = UseMI.operands_begin(), + E = UseMI.operands_end(); I != E; ++I) { const MachineOperand &MO = *I; if (!MO.isReg()) continue; @@ -643,7 +647,7 @@ static bool getDataDeps(const MachineInstr *UseMI, } // Collect virtual register reads. if (MO.readsReg()) - Deps.push_back(DataDep(MRI, Reg, UseMI->getOperandNo(I))); + Deps.push_back(DataDep(MRI, Reg, UseMI.getOperandNo(I))); } return HasPhysRegs; } @@ -651,17 +655,17 @@ static bool getDataDeps(const MachineInstr *UseMI, // Get the input data dependencies of a PHI instruction, using Pred as the // preferred predecessor. // This will add at most one dependency to Deps. -static void getPHIDeps(const MachineInstr *UseMI, +static void getPHIDeps(const MachineInstr &UseMI, SmallVectorImpl<DataDep> &Deps, const MachineBasicBlock *Pred, const MachineRegisterInfo *MRI) { // No predecessor at the beginning of a trace. Ignore dependencies. if (!Pred) return; - assert(UseMI->isPHI() && UseMI->getNumOperands() % 2 && "Bad PHI"); - for (unsigned i = 1; i != UseMI->getNumOperands(); i += 2) { - if (UseMI->getOperand(i + 1).getMBB() == Pred) { - unsigned Reg = UseMI->getOperand(i).getReg(); + assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI"); + for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) { + if (UseMI.getOperand(i + 1).getMBB() == Pred) { + unsigned Reg = UseMI.getOperand(i).getReg(); Deps.push_back(DataDep(MRI, Reg, i)); return; } @@ -823,8 +827,8 @@ computeInstrDepths(const MachineBasicBlock *MBB) { // Collect all data dependencies. Deps.clear(); if (UseMI.isPHI()) - getPHIDeps(&UseMI, Deps, TBI.Pred, MTM.MRI); - else if (getDataDeps(&UseMI, Deps, MTM.MRI)) + getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI); + else if (getDataDeps(UseMI, Deps, MTM.MRI)) updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI); // Filter and process dependencies, computing the earliest issue cycle. @@ -861,15 +865,16 @@ computeInstrDepths(const MachineBasicBlock *MBB) { // Identify physreg dependencies for MI when scanning instructions upwards. // Return the issue height of MI after considering any live regunits. // Height is the issue height computed from virtual register dependencies alone. -static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, +static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height, SparseSet<LiveRegUnit> &RegUnits, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { SmallVector<unsigned, 8> ReadOps; - for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); MOI != MOE; ++MOI) { + for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(), + MOE = MI.operands_end(); + MOI != MOE; ++MOI) { const MachineOperand &MO = *MOI; if (!MO.isReg()) continue; @@ -877,7 +882,7 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue; if (MO.readsReg()) - ReadOps.push_back(MI->getOperandNo(MOI)); + ReadOps.push_back(MI.getOperandNo(MOI)); if (!MO.isDef()) continue; // This is a def of Reg. Remove corresponding entries from RegUnits, and @@ -887,11 +892,11 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, if (I == RegUnits.end()) continue; unsigned DepHeight = I->Cycle; - if (!MI->isTransient()) { + if (!MI.isTransient()) { // We may not know the UseMI of this dependency, if it came from the // live-in list. SchedModel can handle a NULL UseMI. - DepHeight += SchedModel - .computeOperandLatency(MI, MI->getOperandNo(MOI), I->MI, I->Op); + DepHeight += SchedModel.computeOperandLatency(&MI, MI.getOperandNo(MOI), + I->MI, I->Op); } Height = std::max(Height, DepHeight); // This regunit is dead above MI. @@ -901,13 +906,13 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, // Now we know the height of MI. Update any regunits read. for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) { - unsigned Reg = MI->getOperand(ReadOps[i]).getReg(); + unsigned Reg = MI.getOperand(ReadOps[i]).getReg(); for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { LiveRegUnit &LRU = RegUnits[*Units]; // Set the height to the highest reader of the unit. - if (LRU.Cycle <= Height && LRU.MI != MI) { + if (LRU.Cycle <= Height && LRU.MI != &MI) { LRU.Cycle = Height; - LRU.MI = MI; + LRU.MI = &MI; LRU.Op = ReadOps[i]; } } @@ -921,15 +926,14 @@ typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap; // Push the height of DefMI upwards if required to match UseMI. // Return true if this is the first time DefMI was seen. -static bool pushDepHeight(const DataDep &Dep, - const MachineInstr *UseMI, unsigned UseHeight, - MIHeightMap &Heights, +static bool pushDepHeight(const DataDep &Dep, const MachineInstr &UseMI, + unsigned UseHeight, MIHeightMap &Heights, const TargetSchedModel &SchedModel, const TargetInstrInfo *TII) { // Adjust height by Dep.DefMI latency. if (!Dep.DefMI->isTransient()) - UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, - UseMI, Dep.UseOp); + UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, + Dep.UseOp); // Update Heights[DefMI] to be the maximum height seen. MIHeightMap::iterator I; @@ -1048,13 +1052,13 @@ computeInstrHeights(const MachineBasicBlock *MBB) { if (!PHI.isPHI()) break; Deps.clear(); - getPHIDeps(&PHI, Deps, MBB, MTM.MRI); + getPHIDeps(PHI, Deps, MBB, MTM.MRI); if (!Deps.empty()) { // Loop header PHI heights are all 0. unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0; DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI); - if (pushDepHeight(Deps.front(), &PHI, Height, - Heights, MTM.SchedModel, MTM.TII)) + if (pushDepHeight(Deps.front(), PHI, Height, Heights, MTM.SchedModel, + MTM.TII)) addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack); } } @@ -1063,12 +1067,12 @@ computeInstrHeights(const MachineBasicBlock *MBB) { // Go through the block backwards. for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin(); BI != BB;) { - const MachineInstr *MI = --BI; + const MachineInstr &MI = *--BI; // Find the MI height as determined by virtual register uses in the // trace below. unsigned Cycle = 0; - MIHeightMap::iterator HeightI = Heights.find(MI); + MIHeightMap::iterator HeightI = Heights.find(&MI); if (HeightI != Heights.end()) { Cycle = HeightI->second; // We won't be seeing any more MI uses. @@ -1078,27 +1082,27 @@ computeInstrHeights(const MachineBasicBlock *MBB) { // Don't process PHI deps. They depend on the specific predecessor, and // we'll get them when visiting the predecessor. Deps.clear(); - bool HasPhysRegs = !MI->isPHI() && getDataDeps(MI, Deps, MTM.MRI); + bool HasPhysRegs = !MI.isPHI() && getDataDeps(MI, Deps, MTM.MRI); // There may also be regunit dependencies to include in the height. if (HasPhysRegs) - Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, - MTM.SchedModel, MTM.TII, MTM.TRI); + Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, MTM.SchedModel, + MTM.TII, MTM.TRI); // Update the required height of any virtual registers read by MI. for (const DataDep &Dep : Deps) if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII)) addLiveIns(Dep.DefMI, Dep.DefOp, Stack); - InstrCycles &MICycles = Cycles[MI]; + InstrCycles &MICycles = Cycles[&MI]; MICycles.Height = Cycle; if (!TBI.HasValidInstrDepths) { - DEBUG(dbgs() << Cycle << '\t' << *MI); + DEBUG(dbgs() << Cycle << '\t' << MI); continue; } // Update critical path length. TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth); - DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *MI); + DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << MI); } // Update virtual live-in heights. They were added by addLiveIns() with a 0 @@ -1143,26 +1147,25 @@ MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) { } unsigned -MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const { - assert(MI && "Not an instruction."); - assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) && +MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr &MI) const { + assert(getBlockNum() == unsigned(MI.getParent()->getNumber()) && "MI must be in the trace center block"); InstrCycles Cyc = getInstrCycles(MI); return getCriticalPath() - (Cyc.Depth + Cyc.Height); } unsigned -MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { +MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr &PHI) const { const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum()); SmallVector<DataDep, 1> Deps; getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI); assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor"); DataDep &Dep = Deps.front(); - unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth; + unsigned DepCycle = getInstrCycles(*Dep.DefMI).Depth; // Add latency if DefMI is a real instruction. Transients get latency 0. if (!Dep.DefMI->isTransient()) - DepCycle += TE.MTM.SchedModel - .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp); + DepCycle += TE.MTM.SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, + &PHI, Dep.UseOp); return DepCycle; } @@ -1248,13 +1251,13 @@ unsigned MachineTraceMetrics::Trace::getResourceLength( return std::max(Instrs, PRMax); } -bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr *DefMI, - const MachineInstr *UseMI) const { - if (DefMI->getParent() == UseMI->getParent()) +bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr &DefMI, + const MachineInstr &UseMI) const { + if (DefMI.getParent() == UseMI.getParent()) return true; - const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI->getParent()->getNumber()]; - const TraceBlockInfo &TBI = TE.BlockInfo[UseMI->getParent()->getNumber()]; + const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI.getParent()->getNumber()]; + const TraceBlockInfo &TBI = TE.BlockInfo[UseMI.getParent()->getNumber()]; return DepTBI.isUsefulDominator(TBI); } diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp index 428295e..a70adb0 100644 --- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -58,7 +58,7 @@ namespace { Banner(b) {} - bool runOnMachineFunction(MachineFunction &MF); + unsigned verify(MachineFunction &MF); Pass *const PASS; const char *Banner; @@ -217,10 +217,22 @@ namespace { LaneBitmask LaneMask) const; void report_context(const LiveRange::Segment &S) const; void report_context(const VNInfo &VNI) const; + void report_context(SlotIndex Pos) const; + void report_context_liverange(const LiveRange &LR) const; + void report_context_lanemask(LaneBitmask LaneMask) const; + void report_context_vreg(unsigned VReg) const; + void report_context_vreg_regunit(unsigned VRegOrRegUnit) const; void verifyInlineAsm(const MachineInstr *MI); void checkLiveness(const MachineOperand *MO, unsigned MONum); + void checkLivenessAtUse(const MachineOperand *MO, unsigned MONum, + SlotIndex UseIdx, const LiveRange &LR, unsigned Reg, + LaneBitmask LaneMask = 0); + void checkLivenessAtDef(const MachineOperand *MO, unsigned MONum, + SlotIndex DefIdx, const LiveRange &LR, unsigned Reg, + LaneBitmask LaneMask = 0); + void markReachable(const MachineBasicBlock *MBB); void calcRegsPassed(); void checkPHIOps(const MachineBasicBlock *MBB); @@ -239,6 +251,7 @@ namespace { void verifyStackFrame(); void verifySlotIndexes() const; + void verifyProperties(const MachineFunction &MF); }; struct MachineVerifierPass : public MachineFunctionPass { @@ -256,7 +269,9 @@ namespace { } bool runOnMachineFunction(MachineFunction &MF) override { - MF.verify(this, Banner.c_str()); + unsigned FoundErrors = MachineVerifier(this, Banner.c_str()).verify(MF); + if (FoundErrors) + report_fatal_error("Found "+Twine(FoundErrors)+" machine code errors."); return false; } }; @@ -271,9 +286,13 @@ FunctionPass *llvm::createMachineVerifierPass(const std::string &Banner) { return new MachineVerifierPass(Banner); } -void MachineFunction::verify(Pass *p, const char *Banner) const { - MachineVerifier(p, Banner) - .runOnMachineFunction(const_cast<MachineFunction&>(*this)); +bool MachineFunction::verify(Pass *p, const char *Banner, bool AbortOnErrors) + const { + MachineFunction &MF = const_cast<MachineFunction&>(*this); + unsigned FoundErrors = MachineVerifier(p, Banner).verify(MF); + if (AbortOnErrors && FoundErrors) + report_fatal_error("Found "+Twine(FoundErrors)+" machine code errors."); + return FoundErrors == 0; } void MachineVerifier::verifySlotIndexes() const { @@ -289,7 +308,20 @@ void MachineVerifier::verifySlotIndexes() const { } } -bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { +void MachineVerifier::verifyProperties(const MachineFunction &MF) { + // If a pass has introduced virtual registers without clearing the + // AllVRegsAllocated property (or set it without allocating the vregs) + // then report an error. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::AllVRegsAllocated) && + MRI->getNumVirtRegs()) { + report( + "Function has AllVRegsAllocated property but there are VReg operands", + &MF); + } +} + +unsigned MachineVerifier::verify(MachineFunction &MF) { foundErrors = 0; this->MF = &MF; @@ -313,6 +345,8 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { verifySlotIndexes(); + verifyProperties(MF); + visitMachineFunctionBefore(); for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end(); MFI!=MFE; ++MFI) { @@ -374,9 +408,6 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { } visitMachineFunctionAfter(); - if (foundErrors) - report_fatal_error("Found "+Twine(foundErrors)+" machine code errors."); - // Clean up. regsLive.clear(); regsDefined.clear(); @@ -386,7 +417,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { regsLiveInButUnused.clear(); MBBInfoMap.clear(); - return false; // no changes + return foundErrors; } void MachineVerifier::report(const char *msg, const MachineFunction *MF) { @@ -420,8 +451,8 @@ void MachineVerifier::report(const char *msg, const MachineInstr *MI) { assert(MI); report(msg, MI->getParent()); errs() << "- instruction: "; - if (Indexes && Indexes->hasIndex(MI)) - errs() << Indexes->getInstructionIndex(MI) << '\t'; + if (Indexes && Indexes->hasIndex(*MI)) + errs() << Indexes->getInstructionIndex(*MI) << '\t'; MI->print(errs(), /*SkipOpers=*/true); errs() << '\n'; } @@ -435,16 +466,20 @@ void MachineVerifier::report(const char *msg, errs() << "\n"; } +void MachineVerifier::report_context(SlotIndex Pos) const { + errs() << "- at: " << Pos << '\n'; +} + void MachineVerifier::report_context(const LiveInterval &LI) const { errs() << "- interval: " << LI << '\n'; } void MachineVerifier::report_context(const LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) const { + report_context_liverange(LR); errs() << "- register: " << PrintReg(Reg, TRI) << '\n'; if (LaneMask != 0) - errs() << "- lanemask: " << PrintLaneMask(LaneMask) << '\n'; - errs() << "- liverange: " << LR << '\n'; + report_context_lanemask(LaneMask); } void MachineVerifier::report_context(const LiveRange::Segment &S) const { @@ -455,6 +490,26 @@ void MachineVerifier::report_context(const VNInfo &VNI) const { errs() << "- ValNo: " << VNI.id << " (def " << VNI.def << ")\n"; } +void MachineVerifier::report_context_liverange(const LiveRange &LR) const { + errs() << "- liverange: " << LR << '\n'; +} + +void MachineVerifier::report_context_vreg(unsigned VReg) const { + errs() << "- v. register: " << PrintReg(VReg, TRI) << '\n'; +} + +void MachineVerifier::report_context_vreg_regunit(unsigned VRegOrUnit) const { + if (TargetRegisterInfo::isVirtualRegister(VRegOrUnit)) { + report_context_vreg(VRegOrUnit); + } else { + errs() << "- regunit: " << PrintRegUnit(VRegOrUnit, TRI) << '\n'; + } +} + +void MachineVerifier::report_context_lanemask(LaneBitmask LaneMask) const { + errs() << "- lanemask: " << PrintLaneMask(LaneMask) << '\n'; +} + void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { BBInfo &MInfo = MBBInfoMap[MBB]; if (!MInfo.reachable) { @@ -521,7 +576,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { // it is an entry block or landing pad. for (const auto &LI : MBB->liveins()) { if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() && - MBB != MBB->getParent()->begin()) { + MBB->getIterator() != MBB->getParent()->begin()) { report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB); } } @@ -567,8 +622,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { // Call AnalyzeBranch. If it succeeds, there several more conditions to check. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; - if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB), - TBB, FBB, Cond)) { + if (!TII->analyzeBranch(*const_cast<MachineBasicBlock *>(MBB), TBB, FBB, + Cond)) { // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's // check whether its answers match up with reality. if (!TBB && !FBB) { @@ -591,7 +646,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { "differs from its CFG successor!", MBB); } if (!MBB->empty() && MBB->back().isBarrier() && - !TII->isPredicated(&MBB->back())) { + !TII->isPredicated(MBB->back())) { report("MBB exits via unconditional fall-through but ends with a " "barrier instruction!", MBB); } @@ -721,8 +776,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { // This function gets called for all bundle headers, including normal // stand-alone unbundled instructions. void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { - if (Indexes && Indexes->hasIndex(MI)) { - SlotIndex idx = Indexes->getInstructionIndex(MI); + if (Indexes && Indexes->hasIndex(*MI)) { + SlotIndex idx = Indexes->getInstructionIndex(*MI); if (!(idx > lastIndex)) { report("Instruction index out of order", MI); errs() << "Last instruction was at " << lastIndex << '\n'; @@ -733,7 +788,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { // Ensure non-terminators don't follow terminators. // Ignore predicated terminators formed by if conversion. // FIXME: If conversion shouldn't need to violate this rule. - if (MI->isTerminator() && !TII->isPredicated(MI)) { + if (MI->isTerminator() && !TII->isPredicated(*MI)) { if (!FirstTerminator) FirstTerminator = MI; } else if (FirstTerminator) { @@ -755,8 +810,9 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) { if (!MI->getOperand(1).isImm()) report("Asm flags must be an immediate", MI); // Allowed flags are Extra_HasSideEffects = 1, Extra_IsAlignStack = 2, - // Extra_AsmDialect = 4, Extra_MayLoad = 8, and Extra_MayStore = 16. - if (!isUInt<5>(MI->getOperand(1).getImm())) + // Extra_AsmDialect = 4, Extra_MayLoad = 8, and Extra_MayStore = 16, + // and Extra_IsConvergent = 32. + if (!isUInt<6>(MI->getOperand(1).getImm())) report("Unknown asm flags", &MI->getOperand(1), 1); static_assert(InlineAsm::MIOp_FirstOperand == 2, "Asm format changed"); @@ -810,7 +866,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { // Debug values must not have a slot index. // Other instructions must have one, unless they are inside a bundle. if (LiveInts) { - bool mapped = !LiveInts->isNotInMIMap(MI); + bool mapped = !LiveInts->isNotInMIMap(*MI); if (MI->isDebugValue()) { if (mapped) report("Debug instruction has a slot index", MI); @@ -824,7 +880,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { } StringRef ErrorInfo; - if (!TII->verifyInstruction(MI, ErrorInfo)) + if (!TII->verifyInstruction(*MI, ErrorInfo)) report(ErrorInfo.data(), MI); } @@ -929,7 +985,30 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } } else { // Virtual register. - const TargetRegisterClass *RC = MRI->getRegClass(Reg); + const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg); + if (!RC) { + // This is a generic virtual register. + // It must have a size and it must not have a SubIdx. + unsigned Size = MRI->getSize(Reg); + if (!Size) { + report("Generic virtual register must have a size", MO, MONum); + return; + } + // Make sure the register fits into its register bank if any. + const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg); + if (RegBank && RegBank->getSize() < Size) { + report("Register bank is too small for virtual register", MO, + MONum); + errs() << "Register bank " << RegBank->getName() << " too small(" + << RegBank->getSize() << ") to fit " << Size << "-bits\n"; + return; + } + if (SubIdx) { + report("Generic virtual register does not subregister index", MO, MONum); + return; + } + break; + } if (SubIdx) { const TargetRegisterClass *SRC = TRI->getSubClassWithSubReg(RC, SubIdx); @@ -984,10 +1063,10 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { case MachineOperand::MO_FrameIndex: if (LiveStks && LiveStks->hasInterval(MO->getIndex()) && - LiveInts && !LiveInts->isNotInMIMap(MI)) { + LiveInts && !LiveInts->isNotInMIMap(*MI)) { int FI = MO->getIndex(); LiveInterval &LI = LiveStks->getInterval(FI); - SlotIndex Idx = LiveInts->getInstructionIndex(MI); + SlotIndex Idx = LiveInts->getInstructionIndex(*MI); bool stores = MI->mayStore(); bool loads = MI->mayLoad(); @@ -1028,6 +1107,83 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } } +void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO, + unsigned MONum, SlotIndex UseIdx, const LiveRange &LR, unsigned VRegOrUnit, + LaneBitmask LaneMask) { + LiveQueryResult LRQ = LR.Query(UseIdx); + // Check if we have a segment at the use, note however that we only need one + // live subregister range, the others may be dead. + if (!LRQ.valueIn() && LaneMask == 0) { + report("No live segment at use", MO, MONum); + report_context_liverange(LR); + report_context_vreg_regunit(VRegOrUnit); + report_context(UseIdx); + } + if (MO->isKill() && !LRQ.isKill()) { + report("Live range continues after kill flag", MO, MONum); + report_context_liverange(LR); + report_context_vreg_regunit(VRegOrUnit); + if (LaneMask != 0) + report_context_lanemask(LaneMask); + report_context(UseIdx); + } +} + +void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO, + unsigned MONum, SlotIndex DefIdx, const LiveRange &LR, unsigned VRegOrUnit, + LaneBitmask LaneMask) { + if (const VNInfo *VNI = LR.getVNInfoAt(DefIdx)) { + assert(VNI && "NULL valno is not allowed"); + if (VNI->def != DefIdx) { + report("Inconsistent valno->def", MO, MONum); + report_context_liverange(LR); + report_context_vreg_regunit(VRegOrUnit); + if (LaneMask != 0) + report_context_lanemask(LaneMask); + report_context(*VNI); + report_context(DefIdx); + } + } else { + report("No live segment at def", MO, MONum); + report_context_liverange(LR); + report_context_vreg_regunit(VRegOrUnit); + if (LaneMask != 0) + report_context_lanemask(LaneMask); + report_context(DefIdx); + } + // Check that, if the dead def flag is present, LiveInts agree. + if (MO->isDead()) { + LiveQueryResult LRQ = LR.Query(DefIdx); + if (!LRQ.isDeadDef()) { + // In case of physregs we can have a non-dead definition on another + // operand. + bool otherDef = false; + if (!TargetRegisterInfo::isVirtualRegister(VRegOrUnit)) { + const MachineInstr &MI = *MO->getParent(); + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef() || MO.isDead()) + continue; + unsigned Reg = MO.getReg(); + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + if (*Units == VRegOrUnit) { + otherDef = true; + break; + } + } + } + } + + if (!otherDef) { + report("Live range continues after dead def flag", MO, MONum); + report_context_liverange(LR); + report_context_vreg_regunit(VRegOrUnit); + if (LaneMask != 0) + report_context_lanemask(LaneMask); + } + } + } +} + void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { const MachineInstr *MI = MO->getParent(); const unsigned Reg = MO->getReg(); @@ -1048,23 +1204,13 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { } // Check LiveInts liveness and kill. - if (LiveInts && !LiveInts->isNotInMIMap(MI)) { - SlotIndex UseIdx = LiveInts->getInstructionIndex(MI); + if (LiveInts && !LiveInts->isNotInMIMap(*MI)) { + SlotIndex UseIdx = LiveInts->getInstructionIndex(*MI); // Check the cached regunit intervals. if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) { for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { - if (const LiveRange *LR = LiveInts->getCachedRegUnit(*Units)) { - LiveQueryResult LRQ = LR->Query(UseIdx); - if (!LRQ.valueIn()) { - report("No live segment at use", MO, MONum); - errs() << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI) - << ' ' << *LR << '\n'; - } - if (MO->isKill() && !LRQ.isKill()) { - report("Live range continues after kill flag", MO, MONum); - errs() << PrintRegUnit(*Units, TRI) << ' ' << *LR << '\n'; - } - } + if (const LiveRange *LR = LiveInts->getCachedRegUnit(*Units)) + checkLivenessAtUse(MO, MONum, UseIdx, *LR, *Units); } } @@ -1072,16 +1218,28 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (LiveInts->hasInterval(Reg)) { // This is a virtual register interval. const LiveInterval &LI = LiveInts->getInterval(Reg); - LiveQueryResult LRQ = LI.Query(UseIdx); - if (!LRQ.valueIn()) { - report("No live segment at use", MO, MONum); - errs() << UseIdx << " is not live in " << LI << '\n'; - } - // Check for extra kill flags. - // Note that we allow missing kill flags for now. - if (MO->isKill() && !LRQ.isKill()) { - report("Live range continues after kill flag", MO, MONum); - errs() << "Live range: " << LI << '\n'; + checkLivenessAtUse(MO, MONum, UseIdx, LI, Reg); + + if (LI.hasSubRanges() && !MO->isDef()) { + unsigned SubRegIdx = MO->getSubReg(); + LaneBitmask MOMask = SubRegIdx != 0 + ? TRI->getSubRegIndexLaneMask(SubRegIdx) + : MRI->getMaxLaneMaskForVReg(Reg); + LaneBitmask LiveInMask = 0; + for (const LiveInterval::SubRange &SR : LI.subranges()) { + if ((MOMask & SR.LaneMask) == 0) + continue; + checkLivenessAtUse(MO, MONum, UseIdx, SR, Reg, SR.LaneMask); + LiveQueryResult LRQ = SR.Query(UseIdx); + if (LRQ.valueIn()) + LiveInMask |= SR.LaneMask; + } + // At least parts of the register has to be live at the use. + if ((LiveInMask & MOMask) == 0) { + report("No live subrange at use", MO, MONum); + report_context(LI); + report_context(UseIdx); + } } } else { report("Virtual register has no live interval", MO, MONum); @@ -1154,33 +1312,29 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { report("Multiple virtual register defs in SSA form", MO, MONum); // Check LiveInts for a live segment, but only for virtual registers. - if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && - !LiveInts->isNotInMIMap(MI)) { - SlotIndex DefIdx = LiveInts->getInstructionIndex(MI); + if (LiveInts && !LiveInts->isNotInMIMap(*MI)) { + SlotIndex DefIdx = LiveInts->getInstructionIndex(*MI); DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber()); - if (LiveInts->hasInterval(Reg)) { - const LiveInterval &LI = LiveInts->getInterval(Reg); - if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) { - assert(VNI && "NULL valno is not allowed"); - if (VNI->def != DefIdx) { - report("Inconsistent valno->def", MO, MONum); - errs() << "Valno " << VNI->id << " is not defined at " - << DefIdx << " in " << LI << '\n'; + + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (LiveInts->hasInterval(Reg)) { + const LiveInterval &LI = LiveInts->getInterval(Reg); + checkLivenessAtDef(MO, MONum, DefIdx, LI, Reg); + + if (LI.hasSubRanges()) { + unsigned SubRegIdx = MO->getSubReg(); + LaneBitmask MOMask = SubRegIdx != 0 + ? TRI->getSubRegIndexLaneMask(SubRegIdx) + : MRI->getMaxLaneMaskForVReg(Reg); + for (const LiveInterval::SubRange &SR : LI.subranges()) { + if ((SR.LaneMask & MOMask) == 0) + continue; + checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, SR.LaneMask); + } } } else { - report("No live segment at def", MO, MONum); - errs() << DefIdx << " is not live in " << LI << '\n'; - } - // Check that, if the dead def flag is present, LiveInts agree. - if (MO->isDead()) { - LiveQueryResult LRQ = LI.Query(DefIdx); - if (!LRQ.isDeadDef()) { - report("Live range continues after dead def flag", MO, MONum); - errs() << "Live range: " << LI << '\n'; - } + report("Virtual register has no Live interval", MO, MONum); } - } else { - report("Virtual register has no Live interval", MO, MONum); } } } @@ -1360,9 +1514,10 @@ void MachineVerifier::visitMachineFunctionAfter() { BBInfo &MInfo = MBBInfoMap[&MF->front()]; for (RegSet::iterator I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; - ++I) - report("Virtual register def doesn't dominate all uses.", - MRI->getVRegDef(*I)); + ++I) { + report("Virtual register defs don't dominate all uses.", MF); + report_context_vreg(*I); + } } if (LiveVars) @@ -1474,7 +1629,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, if (Reg != 0) { bool hasDef = false; bool isEarlyClobber = false; - for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) { if (!MOI->isReg() || !MOI->isDef()) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) { @@ -1613,18 +1768,33 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // use, or a dead flag on a def. bool hasRead = false; bool hasSubRegDef = false; - for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + bool hasDeadDef = false; + for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) { if (!MOI->isReg() || MOI->getReg() != Reg) continue; if (LaneMask != 0 && (LaneMask & TRI->getSubRegIndexLaneMask(MOI->getSubReg())) == 0) continue; - if (MOI->isDef() && MOI->getSubReg() != 0) - hasSubRegDef = true; + if (MOI->isDef()) { + if (MOI->getSubReg() != 0) + hasSubRegDef = true; + if (MOI->isDead()) + hasDeadDef = true; + } if (MOI->readsReg()) hasRead = true; } - if (!S.end.isDead()) { + if (S.end.isDead()) { + // Make sure that the corresponding machine operand for a "dead" live + // range has the dead flag. We cannot perform this check for subregister + // liveranges as partially dead values are allowed. + if (LaneMask == 0 && !hasDeadDef) { + report("Instruction ending live segment on dead slot has no dead flag", + MI); + report_context(LR, Reg, LaneMask); + report_context(S); + } + } else { if (!hasRead) { // When tracking subregister liveness, the main range must start new // values on partial register writes, even if there is no read. @@ -1670,8 +1840,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); const VNInfo *PVNI = LR.getVNInfoBefore(PEnd); - // All predecessors must have a live-out value. - if (!PVNI) { + // All predecessors must have a live-out value if this is not a + // subregister liverange. + if (!PVNI && LaneMask == 0) { report("Register not marked live out of predecessor", *PI); report_context(LR, Reg, LaneMask); report_context(*VNI); diff --git a/contrib/llvm/lib/CodeGen/OcamlGC.cpp b/contrib/llvm/lib/CodeGen/OcamlGC.cpp deleted file mode 100644 index 17654a6..0000000 --- a/contrib/llvm/lib/CodeGen/OcamlGC.cpp +++ /dev/null @@ -1,36 +0,0 @@ -//===-- OcamlGC.cpp - Ocaml frametable GC strategy ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements lowering for the llvm.gc* intrinsics compatible with -// Objective Caml 3.10.0, which uses a liveness-accurate static stack map. -// -// The frametable emitter is in OcamlGCPrinter.cpp. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/GCs.h" -#include "llvm/CodeGen/GCStrategy.h" - -using namespace llvm; - -namespace { -class OcamlGC : public GCStrategy { -public: - OcamlGC(); -}; -} - -static GCRegistry::Add<OcamlGC> X("ocaml", "ocaml 3.10-compatible GC"); - -void llvm::linkOcamlGC() {} - -OcamlGC::OcamlGC() { - NeededSafePoints = 1 << GC::PostCall; - UsesMetadata = true; -} diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp index a1042e7..0177e41 100644 --- a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp +++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp @@ -63,7 +63,7 @@ INITIALIZE_PASS(OptimizePHIs, "opt-phis", "Optimize machine instruction PHIs", false, false) bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { - if (skipOptnoneFunction(*Fn.getFunction())) + if (skipFunction(*Fn.getFunction())) return false; MRI = &Fn.getRegInfo(); diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp index 2c93792..b8d5431 100644 --- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp +++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "PHIEliminationUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -25,9 +24,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -121,6 +120,7 @@ INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination", "Eliminate PHI nodes for register allocation", false, false) void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addUsedIfAvailable<LiveVariables>(); AU.addPreserved<LiveVariables>(); AU.addPreserved<SlotIndexes>(); AU.addPreserved<LiveIntervals>(); @@ -159,17 +159,16 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { unsigned DefReg = DefMI->getOperand(0).getReg(); if (MRI->use_nodbg_empty(DefReg)) { if (LIS) - LIS->RemoveMachineInstrFromMaps(DefMI); + LIS->RemoveMachineInstrFromMaps(*DefMI); DefMI->eraseFromParent(); } } // Clean up the lowered PHI instructions. - for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = LoweredPHIs.end(); - I != E; ++I) { + for (auto &I : LoweredPHIs) { if (LIS) - LIS->RemoveMachineInstrFromMaps(I->first); - MF.DeleteMachineInstr(I->first); + LIS->RemoveMachineInstrFromMaps(*I.first); + MF.DeleteMachineInstr(I.first); } LoweredPHIs.clear(); @@ -228,7 +227,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, MachineBasicBlock::iterator AfterPHIsIt = std::next(LastPHIIt); // Unlink the PHI node from the basic block, but don't delete the PHI yet. - MachineInstr *MPhi = MBB.remove(MBB.begin()); + MachineInstr *MPhi = MBB.remove(&*MBB.begin()); unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2; unsigned DestReg = MPhi->getOperand(0).getReg(); @@ -270,7 +269,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // Update live variable information if there is any. if (LV) { - MachineInstr *PHICopy = std::prev(AfterPHIsIt); + MachineInstr &PHICopy = *std::prev(AfterPHIsIt); if (IncomingReg) { LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg); @@ -284,7 +283,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, if (reusedIncoming) if (MachineInstr *OldKill = VI.findKill(&MBB)) { DEBUG(dbgs() << "Remove old kill from " << *OldKill); - LV->removeVirtualRegisterKilled(IncomingReg, OldKill); + LV->removeVirtualRegisterKilled(IncomingReg, *OldKill); DEBUG(MBB.dump()); } @@ -298,19 +297,19 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // Since we are going to be deleting the PHI node, if it is the last use of // any registers, or if the value itself is dead, we need to move this // information over to the new copy we just inserted. - LV->removeVirtualRegistersKilled(MPhi); + LV->removeVirtualRegistersKilled(*MPhi); // If the result is dead, update LV. if (isDead) { LV->addVirtualRegisterDead(DestReg, PHICopy); - LV->removeVirtualRegisterDead(DestReg, MPhi); + LV->removeVirtualRegisterDead(DestReg, *MPhi); } } // Update LiveIntervals for the new copy or implicit def. if (LIS) { - MachineInstr *NewInstr = std::prev(AfterPHIsIt); - SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(NewInstr); + SlotIndex DestCopyIndex = + LIS->InsertMachineInstrInMaps(*std::prev(AfterPHIsIt)); SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB); if (IncomingReg) { @@ -453,7 +452,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction"); // Finally, mark it killed. - LV->addVirtualRegisterKilled(SrcReg, KillInst); + LV->addVirtualRegisterKilled(SrcReg, *KillInst); // This vreg no longer lives all of the way through opBlock. unsigned opBlockNum = opBlock.getNumber(); @@ -462,8 +461,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, if (LIS) { if (NewSrcInstr) { - LIS->InsertMachineInstrInMaps(NewSrcInstr); - LIS->addSegmentToEndOfBlock(IncomingReg, NewSrcInstr); + LIS->InsertMachineInstrInMaps(*NewSrcInstr); + LIS->addSegmentToEndOfBlock(IncomingReg, *NewSrcInstr); } if (!SrcUndef && @@ -513,7 +512,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction"); - SlotIndex LastUseIndex = LIS->getInstructionIndex(KillInst); + SlotIndex LastUseIndex = LIS->getInstructionIndex(*KillInst); SrcLI.removeSegment(LastUseIndex.getRegSlot(), LIS->getMBBEndIdx(&opBlock)); } @@ -524,7 +523,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // Really delete the PHI instruction now, if it is not in the LoweredPHIs map. if (reusedIncoming || !IncomingReg) { if (LIS) - LIS->RemoveMachineInstrFromMaps(MPhi); + LIS->RemoveMachineInstrFromMaps(*MPhi); MF.DeleteMachineInstr(MPhi); } } @@ -612,7 +611,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, } if (!ShouldSplit && !SplitAllCriticalEdges) continue; - if (!PreMBB->SplitCriticalEdge(&MBB, this)) { + if (!PreMBB->SplitCriticalEdge(&MBB, *this)) { DEBUG(dbgs() << "Failed to split critical edge.\n"); continue; } diff --git a/contrib/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm/lib/CodeGen/ParallelCG.cpp index e73ba02..ccdaec1 100644 --- a/contrib/llvm/lib/CodeGen/ParallelCG.cpp +++ b/contrib/llvm/lib/CodeGen/ParallelCG.cpp @@ -19,78 +19,81 @@ #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/thread.h" +#include "llvm/Support/ThreadPool.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/SplitModule.h" using namespace llvm; static void codegen(Module *M, llvm::raw_pwrite_stream &OS, - const Target *TheTarget, StringRef CPU, StringRef Features, - const TargetOptions &Options, Reloc::Model RM, - CodeModel::Model CM, CodeGenOpt::Level OL, + function_ref<std::unique_ptr<TargetMachine>()> TMFactory, TargetMachine::CodeGenFileType FileType) { - std::unique_ptr<TargetMachine> TM(TheTarget->createTargetMachine( - M->getTargetTriple(), CPU, Features, Options, RM, CM, OL)); - + std::unique_ptr<TargetMachine> TM = TMFactory(); legacy::PassManager CodeGenPasses; if (TM->addPassesToEmitFile(CodeGenPasses, OS, FileType)) report_fatal_error("Failed to setup codegen"); CodeGenPasses.run(*M); } -std::unique_ptr<Module> -llvm::splitCodeGen(std::unique_ptr<Module> M, - ArrayRef<llvm::raw_pwrite_stream *> OSs, StringRef CPU, - StringRef Features, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, - TargetMachine::CodeGenFileType FileType) { - StringRef TripleStr = M->getTargetTriple(); - std::string ErrMsg; - const Target *TheTarget = TargetRegistry::lookupTarget(TripleStr, ErrMsg); - if (!TheTarget) - report_fatal_error(Twine("Target not found: ") + ErrMsg); +std::unique_ptr<Module> llvm::splitCodeGen( + std::unique_ptr<Module> M, ArrayRef<llvm::raw_pwrite_stream *> OSs, + ArrayRef<llvm::raw_pwrite_stream *> BCOSs, + const std::function<std::unique_ptr<TargetMachine>()> &TMFactory, + TargetMachine::CodeGenFileType FileType, bool PreserveLocals) { + assert(BCOSs.empty() || BCOSs.size() == OSs.size()); if (OSs.size() == 1) { - codegen(M.get(), *OSs[0], TheTarget, CPU, Features, Options, RM, CM, - OL, FileType); + if (!BCOSs.empty()) + WriteBitcodeToFile(M.get(), *BCOSs[0]); + codegen(M.get(), *OSs[0], TMFactory, FileType); return M; } - std::vector<thread> Threads; - SplitModule(std::move(M), OSs.size(), [&](std::unique_ptr<Module> MPart) { - // We want to clone the module in a new context to multi-thread the codegen. - // We do it by serializing partition modules to bitcode (while still on the - // main thread, in order to avoid data races) and spinning up new threads - // which deserialize the partitions into separate contexts. - // FIXME: Provide a more direct way to do this in LLVM. - SmallVector<char, 0> BC; - raw_svector_ostream BCOS(BC); - WriteBitcodeToFile(MPart.get(), BCOS); + // Create ThreadPool in nested scope so that threads will be joined + // on destruction. + { + ThreadPool CodegenThreadPool(OSs.size()); + int ThreadCount = 0; - llvm::raw_pwrite_stream *ThreadOS = OSs[Threads.size()]; - Threads.emplace_back( - [TheTarget, CPU, Features, Options, RM, CM, OL, FileType, - ThreadOS](const SmallVector<char, 0> &BC) { - LLVMContext Ctx; - ErrorOr<std::unique_ptr<Module>> MOrErr = - parseBitcodeFile(MemoryBufferRef(StringRef(BC.data(), BC.size()), - "<split-module>"), - Ctx); - if (!MOrErr) - report_fatal_error("Failed to read bitcode"); - std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get()); + SplitModule( + std::move(M), OSs.size(), + [&](std::unique_ptr<Module> MPart) { + // We want to clone the module in a new context to multi-thread the + // codegen. We do it by serializing partition modules to bitcode + // (while still on the main thread, in order to avoid data races) and + // spinning up new threads which deserialize the partitions into + // separate contexts. + // FIXME: Provide a more direct way to do this in LLVM. + SmallString<0> BC; + raw_svector_ostream BCOS(BC); + WriteBitcodeToFile(MPart.get(), BCOS); - codegen(MPartInCtx.get(), *ThreadOS, TheTarget, CPU, Features, - Options, RM, CM, OL, FileType); - }, - // Pass BC using std::move to ensure that it get moved rather than - // copied into the thread's context. - std::move(BC)); - }); + if (!BCOSs.empty()) { + BCOSs[ThreadCount]->write(BC.begin(), BC.size()); + BCOSs[ThreadCount]->flush(); + } + + llvm::raw_pwrite_stream *ThreadOS = OSs[ThreadCount++]; + // Enqueue the task + CodegenThreadPool.async( + [TMFactory, FileType, ThreadOS](const SmallString<0> &BC) { + LLVMContext Ctx; + ErrorOr<std::unique_ptr<Module>> MOrErr = parseBitcodeFile( + MemoryBufferRef(StringRef(BC.data(), BC.size()), + "<split-module>"), + Ctx); + if (!MOrErr) + report_fatal_error("Failed to read bitcode"); + std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get()); - for (thread &T : Threads) - T.join(); + codegen(MPartInCtx.get(), *ThreadOS, TMFactory, FileType); + }, + // Pass BC using std::move to ensure that it get moved rather than + // copied into the thread's context. + std::move(BC)); + }, + PreserveLocals); + } return {}; } diff --git a/contrib/llvm/lib/CodeGen/PatchableFunction.cpp b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp new file mode 100644 index 0000000..32468c9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp @@ -0,0 +1,88 @@ +//===-- PatchableFunction.cpp - Patchable prologues for LLVM -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements edits function bodies in place to support the +// "patchable-function" attribute. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +namespace { +struct PatchableFunction : public MachineFunctionPass { + static char ID; // Pass identification, replacement for typeid + PatchableFunction() : MachineFunctionPass(ID) { + initializePatchableFunctionPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &F) override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } +}; +} + +/// Returns true if instruction \p MI will not result in actual machine code +/// instructions. +static bool doesNotGeneratecode(const MachineInstr &MI) { + // TODO: Introduce an MCInstrDesc flag for this + switch (MI.getOpcode()) { + default: return false; + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::CFI_INSTRUCTION: + case TargetOpcode::EH_LABEL: + case TargetOpcode::GC_LABEL: + case TargetOpcode::DBG_VALUE: + return true; + } +} + +bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) { + if (!MF.getFunction()->hasFnAttribute("patchable-function")) + return false; + +#ifndef NDEBUG + Attribute PatchAttr = MF.getFunction()->getFnAttribute("patchable-function"); + StringRef PatchType = PatchAttr.getValueAsString(); + assert(PatchType == "prologue-short-redirect" && "Only possibility today!"); +#endif + + auto &FirstMBB = *MF.begin(); + MachineBasicBlock::iterator FirstActualI = FirstMBB.begin(); + for (; doesNotGeneratecode(*FirstActualI); ++FirstActualI) + assert(FirstActualI != FirstMBB.end()); + + auto *TII = MF.getSubtarget().getInstrInfo(); + auto MIB = BuildMI(FirstMBB, FirstActualI, FirstActualI->getDebugLoc(), + TII->get(TargetOpcode::PATCHABLE_OP)) + .addImm(2) + .addImm(FirstActualI->getOpcode()); + + for (auto &MO : FirstActualI->operands()) + MIB.addOperand(MO); + + FirstActualI->eraseFromParent(); + MF.ensureAlignment(4); + return true; +} + +char PatchableFunction::ID = 0; +char &llvm::PatchableFunctionID = PatchableFunction::ID; +INITIALIZE_PASS(PatchableFunction, "patchable-function", + "Implement the 'patchable-function' attribute", false, false) diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 52b42b6..60b27dd 100644 --- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -394,10 +394,10 @@ namespace { char PeepholeOptimizer::ID = 0; char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; -INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts", +INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE, "Peephole Optimizations", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", +INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE, "Peephole Optimizations", false, false) /// If instruction is a copy-like instruction, i.e. it reads a single register @@ -564,13 +564,13 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, // physical register, we can try to optimize it. unsigned SrcReg, SrcReg2; int CmpMask, CmpValue; - if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) || + if (!TII->analyzeCompare(*MI, SrcReg, SrcReg2, CmpMask, CmpValue) || TargetRegisterInfo::isPhysicalRegister(SrcReg) || (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2))) return false; // Attempt to optimize the comparison instruction. - if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) { + if (TII->optimizeCompareInstr(*MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) { ++NumCmps; return true; } @@ -585,11 +585,11 @@ bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI, unsigned FalseOp = 0; bool Optimizable = false; SmallVector<MachineOperand, 4> Cond; - if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable)) + if (TII->analyzeSelect(*MI, Cond, TrueOp, FalseOp, Optimizable)) return false; if (!Optimizable) return false; - if (!TII->optimizeSelect(MI, LocalMIs)) + if (!TII->optimizeSelect(*MI, LocalMIs)) return false; MI->eraseFromParent(); ++NumSelects; @@ -599,7 +599,7 @@ bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI, /// \brief Check if a simpler conditional branch can be // generated bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) { - return TII->optimizeCondBranch(MI); + return TII->optimizeCondBranch(*MI); } /// \brief Try to find the next source that share the same register file @@ -1351,7 +1351,7 @@ bool PeepholeOptimizer::foldImmediate( continue; DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg); assert(II != ImmDefMIs.end() && "couldn't find immediate definition"); - if (TII->FoldImmediate(MI, II->second, Reg, MRI)) { + if (TII->FoldImmediate(*MI, *II->second, Reg, MRI)) { ++NumImmFold; return true; } @@ -1471,7 +1471,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy( } bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { - if (skipOptnoneFunction(*MF.getFunction())) + if (skipFunction(*MF.getFunction())) return false; DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); @@ -1636,10 +1636,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { // we need it for markUsesInDebugValueAsUndef(). unsigned FoldedReg = FoldAsLoadDefReg; MachineInstr *DefMI = nullptr; - MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, - FoldAsLoadDefReg, - DefMI); - if (FoldMI) { + if (MachineInstr *FoldMI = + TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) { // Update LocalMIs since we replaced MI with FoldMI and deleted // DefMI. DEBUG(dbgs() << "Replacing: " << *MI); @@ -1888,9 +1886,11 @@ ValueTrackerResult ValueTracker::getNextSourceFromPHI() { ValueTrackerResult ValueTracker::getNextSourceImpl() { assert(Def && "This method needs a valid definition"); - assert( - (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) && - Def->getOperand(DefIdx).isDef() && "Invalid DefIdx"); + assert(((Def->getOperand(DefIdx).isDef() && + (DefIdx < Def->getDesc().getNumDefs() || + Def->getDesc().isVariadic())) || + Def->getOperand(DefIdx).isImplicit()) && + "Invalid DefIdx"); if (Def->isCopy()) return getNextSourceFromCopy(); if (Def->isBitcast()) diff --git a/contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp new file mode 100644 index 0000000..5bc5f75 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -0,0 +1,98 @@ +//===----- PostRAHazardRecognizer.cpp - hazard recognizer -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This runs the hazard recognizer and emits noops when necessary. This +/// gives targets a way to run the hazard recognizer without running one of +/// the schedulers. Example use cases for this pass would be: +/// +/// - Targets that need the hazard recognizer to be run at -O0. +/// - Targets that want to guarantee that hazards at the beginning of +/// scheduling regions are handled correctly. The post-RA scheduler is +/// a top-down scheduler, but when there are multiple scheduling regions +/// in a basic block, it visits the regions in bottom-up order. This +/// makes it impossible for the scheduler to gauranttee it can correctly +/// handle hazards at the beginning of scheduling regions. +/// +/// This pass traverses all the instructions in a program in top-down order. +/// In contrast to the instruction scheduling passes, this pass never resets +/// the hazard recognizer to ensure it can correctly handles noop hazards at +/// the begining of blocks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "post-RA-hazard-rec" + +STATISTIC(NumNoops, "Number of noops inserted"); + +namespace { + class PostRAHazardRecognizer : public MachineFunctionPass { + + public: + static char ID; + PostRAHazardRecognizer() : MachineFunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &Fn) override; + + }; + char PostRAHazardRecognizer::ID = 0; + +} + +char &llvm::PostRAHazardRecognizerID = PostRAHazardRecognizer::ID; + +INITIALIZE_PASS(PostRAHazardRecognizer, DEBUG_TYPE, + "Post RA hazard recognizer", false, false) + +bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) { + const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo(); + std::unique_ptr<ScheduleHazardRecognizer> HazardRec( + TII->CreateTargetPostRAHazardRecognizer(Fn)); + + // Return if the target has not implemented a hazard recognizer. + if (!HazardRec.get()) + return false; + + // Loop over all of the basic blocks + for (auto &MBB : Fn) { + // We do not call HazardRec->reset() here to make sure we are handling noop + // hazards at the start of basic blocks. + for (MachineInstr &MI : MBB) { + // If we need to emit noops prior to this instruction, then do so. + unsigned NumPreNoops = HazardRec->PreEmitNoops(&MI); + for (unsigned i = 0; i != NumPreNoops; ++i) { + HazardRec->EmitNoop(); + TII->insertNoop(MBB, MachineBasicBlock::iterator(MI)); + ++NumNoops; + } + + HazardRec->EmitInstruction(&MI); + if (HazardRec->atIssueLimit()) { + HazardRec->AdvanceCycle(); + } + } + } + return true; +} diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp index b95dffd..3fce307 100644 --- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -18,11 +18,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "AggressiveAntiDepBreaker.h" #include "AntiDepBreaker.h" #include "CriticalAntiDepBreaker.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" @@ -31,10 +29,12 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -96,8 +96,14 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } + bool runOnMachineFunction(MachineFunction &Fn) override; + private: bool enablePostRAScheduler( const TargetSubtargetInfo &ST, CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode &Mode, @@ -128,6 +134,9 @@ namespace { /// The schedule. Null SUnit*'s represent noop instructions. std::vector<SUnit*> Sequence; + /// Ordered list of DAG postprocessing steps. + std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; + /// The index in BB of RegionEnd. /// /// This is the instruction number from the top of the current block, not @@ -169,13 +178,16 @@ namespace { /// Observe - Update liveness information to account for the current /// instruction, which will not be scheduled. /// - void Observe(MachineInstr *MI, unsigned Count); + void Observe(MachineInstr &MI, unsigned Count); /// finishBlock - Clean up register live-range state. /// void finishBlock() override; private: + /// Apply each ScheduleDAGMutation step in order. + void postprocessDAG(); + void ReleaseSucc(SUnit *SU, SDep *SuccEdge); void ReleaseSuccessors(SUnit *SU); void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); @@ -203,6 +215,7 @@ SchedulePostRATDList::SchedulePostRATDList( HazardRec = MF.getSubtarget().getInstrInfo()->CreateTargetPostRAHazardRecognizer( InstrItins, this); + MF.getSubtarget().getPostRAMutations(Mutations); assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE || MRI.tracksLiveness()) && @@ -257,12 +270,17 @@ bool PostRAScheduler::enablePostRAScheduler( TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const { Mode = ST.getAntiDepBreakMode(); ST.getCriticalPathRCs(CriticalPathRCs); + + // Check for explicit enable/disable of post-ra scheduling. + if (EnablePostRAScheduler.getPosition() > 0) + return EnablePostRAScheduler; + return ST.enablePostRAScheduler() && OptLevel >= ST.getOptLevelToEnablePostRAScheduler(); } bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { - if (skipOptnoneFunction(*Fn.getFunction())) + if (skipFunction(*Fn.getFunction())) return false; TII = Fn.getSubtarget().getInstrInfo(); @@ -272,20 +290,15 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { RegClassInfo.runOnMachineFunction(Fn); - // Check for explicit enable/disable of post-ra scheduling. TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = TargetSubtargetInfo::ANTIDEP_NONE; SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs; - if (EnablePostRAScheduler.getPosition() > 0) { - if (!EnablePostRAScheduler) - return false; - } else { - // Check that post-RA scheduling is enabled for this target. - // This may upgrade the AntiDepMode. - if (!enablePostRAScheduler(Fn.getSubtarget(), PassConfig->getOptLevel(), - AntiDepMode, CriticalPathRCs)) - return false; - } + + // Check that post-RA scheduling is enabled for this target. + // This may upgrade the AntiDepMode. + if (!enablePostRAScheduler(Fn.getSubtarget(), PassConfig->getOptLevel(), + AntiDepMode, CriticalPathRCs)) + return false; // Check for antidep breaking override... if (EnableAntiDepBreaking.getPosition() > 0) { @@ -322,24 +335,24 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { MachineBasicBlock::iterator Current = MBB.end(); unsigned Count = MBB.size(), CurrentCount = Count; for (MachineBasicBlock::iterator I = Current; I != MBB.begin();) { - MachineInstr *MI = std::prev(I); + MachineInstr &MI = *std::prev(I); --Count; // Calls are not scheduling boundaries before register allocation, but // post-ra we don't gain anything by scheduling across calls since we // don't need to worry about register pressure. - if (MI->isCall() || TII->isSchedulingBoundary(MI, &MBB, Fn)) { + if (MI.isCall() || TII->isSchedulingBoundary(MI, &MBB, Fn)) { Scheduler.enterRegion(&MBB, I, Current, CurrentCount - Count); Scheduler.setEndIndex(CurrentCount); Scheduler.schedule(); Scheduler.exitRegion(); Scheduler.EmitSchedule(); - Current = MI; + Current = &MI; CurrentCount = Count; Scheduler.Observe(MI, CurrentCount); } I = MI; - if (MI->isBundle()) - Count -= MI->getBundleSize(); + if (MI.isBundle()) + Count -= MI.getBundleSize(); } assert(Count == 0 && "Instruction count mismatch!"); assert((MBB.begin() == Current || CurrentCount != 0) && @@ -398,6 +411,8 @@ void SchedulePostRATDList::schedule() { } } + postprocessDAG(); + DEBUG(dbgs() << "********** List Scheduling **********\n"); DEBUG( for (const SUnit &SU : SUnits) { @@ -414,7 +429,7 @@ void SchedulePostRATDList::schedule() { /// Observe - Update liveness information to account for the current /// instruction, which will not be scheduled. /// -void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) { +void SchedulePostRATDList::Observe(MachineInstr &MI, unsigned Count) { if (AntiDepBreak) AntiDepBreak->Observe(MI, Count, EndIndex); } @@ -429,6 +444,12 @@ void SchedulePostRATDList::finishBlock() { ScheduleDAGInstrs::finishBlock(); } +/// Apply each ScheduleDAGMutation step in order. +void SchedulePostRATDList::postprocessDAG() { + for (auto &M : Mutations) + M->apply(this); +} + //===----------------------------------------------------------------------===// // Top-Down Scheduling //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp new file mode 100644 index 0000000..fbc2bc6 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -0,0 +1,94 @@ +//===-- PreISelIntrinsicLowering.cpp - Pre-ISel intrinsic lowering pass ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements IR lowering for the llvm.load.relative intrinsic. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/PreISelIntrinsicLowering.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" + +using namespace llvm; + +namespace { + +bool lowerLoadRelative(Function &F) { + if (F.use_empty()) + return false; + + bool Changed = false; + Type *Int32Ty = Type::getInt32Ty(F.getContext()); + Type *Int32PtrTy = Int32Ty->getPointerTo(); + Type *Int8Ty = Type::getInt8Ty(F.getContext()); + + for (auto I = F.use_begin(), E = F.use_end(); I != E;) { + auto CI = dyn_cast<CallInst>(I->getUser()); + ++I; + if (!CI || CI->getCalledValue() != &F) + continue; + + IRBuilder<> B(CI); + Value *OffsetPtr = + B.CreateGEP(Int8Ty, CI->getArgOperand(0), CI->getArgOperand(1)); + Value *OffsetPtrI32 = B.CreateBitCast(OffsetPtr, Int32PtrTy); + Value *OffsetI32 = B.CreateAlignedLoad(OffsetPtrI32, 4); + + Value *ResultPtr = B.CreateGEP(Int8Ty, CI->getArgOperand(0), OffsetI32); + + CI->replaceAllUsesWith(ResultPtr); + CI->eraseFromParent(); + Changed = true; + } + + return Changed; +} + +bool lowerIntrinsics(Module &M) { + bool Changed = false; + for (Function &F : M) { + if (F.getName().startswith("llvm.load.relative.")) + Changed |= lowerLoadRelative(F); + } + return Changed; +} + +class PreISelIntrinsicLoweringLegacyPass : public ModulePass { +public: + static char ID; + PreISelIntrinsicLoweringLegacyPass() : ModulePass(ID) {} + + bool runOnModule(Module &M) { return lowerIntrinsics(M); } +}; + +char PreISelIntrinsicLoweringLegacyPass::ID; +} + +INITIALIZE_PASS(PreISelIntrinsicLoweringLegacyPass, + "pre-isel-intrinsic-lowering", "Pre-ISel Intrinsic Lowering", + false, false) + +namespace llvm { +ModulePass *createPreISelIntrinsicLoweringPass() { + return new PreISelIntrinsicLoweringLegacyPass; +} + +PreservedAnalyses PreISelIntrinsicLoweringPass::run(Module &M, + ModuleAnalysisManager &AM) { + if (!lowerIntrinsics(M)) + return PreservedAnalyses::all(); + else + return PreservedAnalyses::none(); +} +} // End llvm namespace diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 939c500..20a9a39 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" @@ -35,7 +34,6 @@ #include "llvm/IR/InlineAsm.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" @@ -49,48 +47,83 @@ using namespace llvm; #define DEBUG_TYPE "pei" +typedef SmallVector<MachineBasicBlock *, 4> MBBVector; +static void doSpillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS, + unsigned &MinCSFrameIndex, + unsigned &MaxCXFrameIndex, + const MBBVector &SaveBlocks, + const MBBVector &RestoreBlocks); + +static void doScavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger *RS); + namespace { class PEI : public MachineFunctionPass { public: static char ID; - PEI() : MachineFunctionPass(ID) { + explicit PEI(const TargetMachine *TM = nullptr) : MachineFunctionPass(ID) { initializePEIPass(*PassRegistry::getPassRegistry()); + + if (TM && (!TM->usesPhysRegsForPEI())) { + SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *, + unsigned &, unsigned &, const MBBVector &, + const MBBVector &) {}; + ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger *) {}; + } else { + SpillCalleeSavedRegisters = doSpillCalleeSavedRegs; + ScavengeFrameVirtualRegs = doScavengeFrameVirtualRegs; + UsesCalleeSaves = true; + } } void getAnalysisUsage(AnalysisUsage &AU) const override; + MachineFunctionProperties getRequiredProperties() const override { + MachineFunctionProperties MFP; + if (UsesCalleeSaves) + MFP.set(MachineFunctionProperties::Property::AllVRegsAllocated); + return MFP; + } + /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. /// bool runOnMachineFunction(MachineFunction &Fn) override; private: + std::function<void(MachineFunction &MF, RegScavenger *RS, + unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex, + const MBBVector &SaveBlocks, + const MBBVector &RestoreBlocks)> + SpillCalleeSavedRegisters; + std::function<void(MachineFunction &MF, RegScavenger *RS)> + ScavengeFrameVirtualRegs; + + bool UsesCalleeSaves = false; + RegScavenger *RS; // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved // stack frame indexes. - unsigned MinCSFrameIndex, MaxCSFrameIndex; + unsigned MinCSFrameIndex = std::numeric_limits<unsigned>::max(); + unsigned MaxCSFrameIndex = 0; // Save and Restore blocks of the current function. Typically there is a // single save block, unless Windows EH funclets are involved. - SmallVector<MachineBasicBlock *, 1> SaveBlocks; - SmallVector<MachineBasicBlock *, 4> RestoreBlocks; + MBBVector SaveBlocks; + MBBVector RestoreBlocks; // Flag to control whether to use the register scavenger to resolve // frame index materialization registers. Set according to // TRI->requiresFrameIndexScavenging() for the current function. bool FrameIndexVirtualScavenging; - void calculateSets(MachineFunction &Fn); - void calculateCallsInformation(MachineFunction &Fn); - void assignCalleeSavedSpillSlots(MachineFunction &Fn, - const BitVector &SavedRegs); - void insertCSRSpillsAndRestores(MachineFunction &Fn); + void calculateCallFrameInfo(MachineFunction &Fn); + void calculateSaveRestoreBlocks(MachineFunction &Fn); + void calculateFrameObjectOffsets(MachineFunction &Fn); void replaceFrameIndices(MachineFunction &Fn); void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, int &SPAdj); - void scavengeFrameVirtualRegs(MachineFunction &Fn); void insertPrologEpilogCode(MachineFunction &Fn); }; } // namespace @@ -103,15 +136,19 @@ WarnStackSize("warn-stack-size", cl::Hidden, cl::init((unsigned)-1), cl::desc("Warn for stack size bigger than the given" " number")); -INITIALIZE_PASS_BEGIN(PEI, "prologepilog", - "Prologue/Epilogue Insertion", false, false) +INITIALIZE_TM_PASS_BEGIN(PEI, "prologepilog", "Prologue/Epilogue Insertion", + false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(StackProtector) -INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) -INITIALIZE_PASS_END(PEI, "prologepilog", - "Prologue/Epilogue Insertion & Frame Finalization", - false, false) +INITIALIZE_TM_PASS_END(PEI, "prologepilog", + "Prologue/Epilogue Insertion & Frame Finalization", + false, false) + +MachineFunctionPass * +llvm::createPrologEpilogInserterPass(const TargetMachine *TM) { + return new PEI(TM); +} STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); STATISTIC(NumBytesStackSpace, @@ -122,40 +159,9 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<MachineLoopInfo>(); AU.addPreserved<MachineDominatorTree>(); AU.addRequired<StackProtector>(); - AU.addRequired<TargetPassConfig>(); MachineFunctionPass::getAnalysisUsage(AU); } -/// Compute the set of return blocks -void PEI::calculateSets(MachineFunction &Fn) { - const MachineFrameInfo *MFI = Fn.getFrameInfo(); - - // Even when we do not change any CSR, we still want to insert the - // prologue and epilogue of the function. - // So set the save points for those. - - // Use the points found by shrink-wrapping, if any. - if (MFI->getSavePoint()) { - SaveBlocks.push_back(MFI->getSavePoint()); - assert(MFI->getRestorePoint() && "Both restore and save must be set"); - MachineBasicBlock *RestoreBlock = MFI->getRestorePoint(); - // If RestoreBlock does not have any successor and is not a return block - // then the end point is unreachable and we do not need to insert any - // epilogue. - if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) - RestoreBlocks.push_back(RestoreBlock); - return; - } - - // Save refs to entry and return blocks. - SaveBlocks.push_back(&Fn.front()); - for (MachineBasicBlock &MBB : Fn) { - if (MBB.isEHFuncletEntry()) - SaveBlocks.push_back(&MBB); - if (MBB.isReturnBlock()) - RestoreBlocks.push_back(&MBB); - } -} /// StackObjSet - A set of stack object indexes typedef SmallSetVector<int, 8> StackObjSet; @@ -168,30 +174,21 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); - assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs"); - RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); // Calculate the MaxCallFrameSize and AdjustsStack variables for the // function's frame information. Also eliminates call frame pseudo // instructions. - calculateCallsInformation(Fn); + calculateCallFrameInfo(Fn); - // Determine which of the registers in the callee save list should be saved. - BitVector SavedRegs; - TFI->determineCalleeSaves(Fn, SavedRegs, RS); - - // Insert spill code for any callee saved registers that are modified. - assignCalleeSavedSpillSlots(Fn, SavedRegs); - - // Determine placement of CSR spill/restore code: + // Determine placement of CSR spill/restore code and prolog/epilog code: // place all spills in the entry block, all restores in return blocks. - calculateSets(Fn); + calculateSaveRestoreBlocks(Fn); - // Add the code to save and restore the callee saved registers. - if (!F->hasFnAttribute(Attribute::Naked)) - insertCSRSpillsAndRestores(Fn); + // Handle CSR spilling and restoring, for targets that need it. + SpillCalleeSavedRegisters(Fn, RS, MinCSFrameIndex, MaxCSFrameIndex, + SaveBlocks, RestoreBlocks); // Allow the target machine to make final modifications to the function // before the frame layout is finalized. @@ -216,11 +213,12 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // If register scavenging is needed, as we've enabled doing it as a // post-pass, scavenge the virtual registers that frame index elimination // inserted. - if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) - scavengeFrameVirtualRegs(Fn); + if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) { + ScavengeFrameVirtualRegs(Fn, RS); - // Clear any vregs created by virtual scavenging. - Fn.getRegInfo().clearVirtRegs(); + // Clear any vregs created by virtual scavenging. + Fn.getRegInfo().clearVirtRegs(); + } // Warn on stack size when we exceeds the given limit. MachineFrameInfo *MFI = Fn.getFrameInfo(); @@ -233,13 +231,15 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { delete RS; SaveBlocks.clear(); RestoreBlocks.clear(); + MFI->setSavePoint(nullptr); + MFI->setRestorePoint(nullptr); return true; } -/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack +/// Calculate the MaxCallFrameSize and AdjustsStack /// variables for the function's frame information and eliminate call frame /// pseudo instructions. -void PEI::calculateCallsInformation(MachineFunction &Fn) { +void PEI::calculateCallFrameInfo(MachineFunction &Fn) { const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); MachineFrameInfo *MFI = Fn.getFrameInfo(); @@ -290,12 +290,42 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { } } -void PEI::assignCalleeSavedSpillSlots(MachineFunction &F, - const BitVector &SavedRegs) { - // These are used to keep track the callee-save area. Initialize them. - MinCSFrameIndex = INT_MAX; - MaxCSFrameIndex = 0; +/// Compute the sets of entry and return blocks for saving and restoring +/// callee-saved registers, and placing prolog and epilog code. +void PEI::calculateSaveRestoreBlocks(MachineFunction &Fn) { + const MachineFrameInfo *MFI = Fn.getFrameInfo(); + + // Even when we do not change any CSR, we still want to insert the + // prologue and epilogue of the function. + // So set the save points for those. + // Use the points found by shrink-wrapping, if any. + if (MFI->getSavePoint()) { + SaveBlocks.push_back(MFI->getSavePoint()); + assert(MFI->getRestorePoint() && "Both restore and save must be set"); + MachineBasicBlock *RestoreBlock = MFI->getRestorePoint(); + // If RestoreBlock does not have any successor and is not a return block + // then the end point is unreachable and we do not need to insert any + // epilogue. + if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) + RestoreBlocks.push_back(RestoreBlock); + return; + } + + // Save refs to entry and return blocks. + SaveBlocks.push_back(&Fn.front()); + for (MachineBasicBlock &MBB : Fn) { + if (MBB.isEHFuncletEntry()) + SaveBlocks.push_back(&MBB); + if (MBB.isReturnBlock()) + RestoreBlocks.push_back(&MBB); + } +} + +static void assignCalleeSavedSpillSlots(MachineFunction &F, + const BitVector &SavedRegs, + unsigned &MinCSFrameIndex, + unsigned &MaxCSFrameIndex) { if (SavedRegs.empty()) return; @@ -323,14 +353,13 @@ void PEI::assignCalleeSavedSpillSlots(MachineFunction &F, // Now that we know which registers need to be saved and restored, allocate // stack slots for them. - for (std::vector<CalleeSavedInfo>::iterator I = CSI.begin(), E = CSI.end(); - I != E; ++I) { - unsigned Reg = I->getReg(); + for (auto &CS : CSI) { + unsigned Reg = CS.getReg(); const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); int FrameIdx; if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) { - I->setFrameIdx(FrameIdx); + CS.setFrameIdx(FrameIdx); continue; } @@ -359,7 +388,7 @@ void PEI::assignCalleeSavedSpillSlots(MachineFunction &F, MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset); } - I->setFrameIdx(FrameIdx); + CS.setFrameIdx(FrameIdx); } } @@ -427,7 +456,9 @@ static void updateLiveness(MachineFunction &MF) { /// insertCSRSpillsAndRestores - Insert spill and restore code for /// callee saved registers used in the function. /// -void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { +static void insertCSRSpillsAndRestores(MachineFunction &Fn, + const MBBVector &SaveBlocks, + const MBBVector &RestoreBlocks) { // Get callee saved register information. MachineFrameInfo *MFI = Fn.getFrameInfo(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); @@ -496,6 +527,28 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { } } +static void doSpillCalleeSavedRegs(MachineFunction &Fn, RegScavenger *RS, + unsigned &MinCSFrameIndex, + unsigned &MaxCSFrameIndex, + const MBBVector &SaveBlocks, + const MBBVector &RestoreBlocks) { + const Function *F = Fn.getFunction(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); + MinCSFrameIndex = std::numeric_limits<unsigned>::max(); + MaxCSFrameIndex = 0; + + // Determine which of the registers in the callee save list should be saved. + BitVector SavedRegs; + TFI->determineCalleeSaves(Fn, SavedRegs, RS); + + // Assign stack slots for any callee-saved registers that must be spilled. + assignCalleeSavedSpillSlots(Fn, SavedRegs, MinCSFrameIndex, MaxCSFrameIndex); + + // Add the code to save and restore the callee saved registers. + if (!F->hasFnAttribute(Attribute::Naked)) + insertCSRSpillsAndRestores(Fn, SaveBlocks, RestoreBlocks); +} + /// AdjustStackOffset - Helper function used to adjust the stack frame offset. static inline void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, @@ -512,7 +565,7 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, MaxAlign = std::max(MaxAlign, Align); // Adjust to alignment boundary. - Offset = RoundUpToAlignment(Offset, Align, Skew); + Offset = alignTo(Offset, Align, Skew); if (StackGrowsDown) { DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n"); @@ -524,6 +577,108 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, } } +/// Compute which bytes of fixed and callee-save stack area are unused and keep +/// track of them in StackBytesFree. +/// +static inline void +computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown, + unsigned MinCSFrameIndex, unsigned MaxCSFrameIndex, + int64_t FixedCSEnd, BitVector &StackBytesFree) { + // Avoid undefined int64_t -> int conversion below in extreme case. + if (FixedCSEnd > std::numeric_limits<int>::max()) + return; + + StackBytesFree.resize(FixedCSEnd, true); + + SmallVector<int, 16> AllocatedFrameSlots; + // Add fixed objects. + for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) + AllocatedFrameSlots.push_back(i); + // Add callee-save objects. + for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i) + AllocatedFrameSlots.push_back(i); + + for (int i : AllocatedFrameSlots) { + // These are converted from int64_t, but they should always fit in int + // because of the FixedCSEnd check above. + int ObjOffset = MFI->getObjectOffset(i); + int ObjSize = MFI->getObjectSize(i); + int ObjStart, ObjEnd; + if (StackGrowsDown) { + // ObjOffset is negative when StackGrowsDown is true. + ObjStart = -ObjOffset - ObjSize; + ObjEnd = -ObjOffset; + } else { + ObjStart = ObjOffset; + ObjEnd = ObjOffset + ObjSize; + } + // Ignore fixed holes that are in the previous stack frame. + if (ObjEnd > 0) + StackBytesFree.reset(ObjStart, ObjEnd); + } +} + +/// Assign frame object to an unused portion of the stack in the fixed stack +/// object range. Return true if the allocation was successful. +/// +static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx, + bool StackGrowsDown, unsigned MaxAlign, + BitVector &StackBytesFree) { + if (MFI->isVariableSizedObjectIndex(FrameIdx)) + return false; + + if (StackBytesFree.none()) { + // clear it to speed up later scavengeStackSlot calls to + // StackBytesFree.none() + StackBytesFree.clear(); + return false; + } + + unsigned ObjAlign = MFI->getObjectAlignment(FrameIdx); + if (ObjAlign > MaxAlign) + return false; + + int64_t ObjSize = MFI->getObjectSize(FrameIdx); + int FreeStart; + for (FreeStart = StackBytesFree.find_first(); FreeStart != -1; + FreeStart = StackBytesFree.find_next(FreeStart)) { + + // Check that free space has suitable alignment. + unsigned ObjStart = StackGrowsDown ? FreeStart + ObjSize : FreeStart; + if (alignTo(ObjStart, ObjAlign) != ObjStart) + continue; + + if (FreeStart + ObjSize > StackBytesFree.size()) + return false; + + bool AllBytesFree = true; + for (unsigned Byte = 0; Byte < ObjSize; ++Byte) + if (!StackBytesFree.test(FreeStart + Byte)) { + AllBytesFree = false; + break; + } + if (AllBytesFree) + break; + } + + if (FreeStart == -1) + return false; + + if (StackGrowsDown) { + int ObjStart = -(FreeStart + ObjSize); + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << ObjStart + << "]\n"); + MFI->setObjectOffset(FrameIdx, ObjStart); + } else { + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << FreeStart + << "]\n"); + MFI->setObjectOffset(FrameIdx, FreeStart); + } + + StackBytesFree.reset(FreeStart, FreeStart + ObjSize); + return true; +} + /// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., /// those required to be close to the Stack Protector) to stack offsets. static void @@ -568,9 +723,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // If there are fixed sized objects that are preallocated in the local area, // non-fixed objects can't be allocated right at the start of local area. - // We currently don't support filling in holes in between fixed sized - // objects, so we adjust 'Offset' to point to the end of last fixed sized - // preallocated object. + // Adjust 'Offset' to point to the end of last fixed sized preallocated + // object. for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { int64_t FixedOff; if (StackGrowsDown) { @@ -596,22 +750,27 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary - Offset = RoundUpToAlignment(Offset, Align, Skew); + Offset = alignTo(Offset, Align, Skew); + DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n"); MFI->setObjectOffset(i, -Offset); // Set the computed offset } - } else { - int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; - for (int i = MaxCSFI; i >= MinCSFI ; --i) { + } else if (MaxCSFrameIndex >= MinCSFrameIndex) { + // Be careful about underflow in comparisons agains MinCSFrameIndex. + for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) { unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary - Offset = RoundUpToAlignment(Offset, Align, Skew); + Offset = alignTo(Offset, Align, Skew); + DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n"); MFI->setObjectOffset(i, Offset); Offset += MFI->getObjectSize(i); } } + // FixedCSEnd is the stack offset to the end of the fixed and callee-save + // stack area. + int64_t FixedCSEnd = Offset; unsigned MaxAlign = MFI->getMaxAlignment(); // Make sure the special register scavenging spill slot is closest to the @@ -638,7 +797,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { unsigned Align = MFI->getLocalFrameMaxAlign(); // Adjust to alignment boundary. - Offset = RoundUpToAlignment(Offset, Align, Skew); + Offset = alignTo(Offset, Align, Skew); DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); @@ -656,6 +815,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { MaxAlign = std::max(Align, MaxAlign); } + // Retrieve the Exception Handler registration node. + int EHRegNodeFrameIndex = INT_MAX; + if (const WinEHFuncInfo *FuncInfo = Fn.getWinEHFuncInfo()) + EHRegNodeFrameIndex = FuncInfo->EHRegNodeFrameIndex; + // Make sure that the stack protector comes before the local variables on the // stack. SmallSet<int, 16> ProtectedObjs; @@ -678,7 +842,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (MFI->isDeadObjectIndex(i)) continue; - if (MFI->getStackProtectorIndex() == (int)i) + if (MFI->getStackProtectorIndex() == (int)i || + EHRegNodeFrameIndex == (int)i) continue; switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) { @@ -705,8 +870,10 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { Offset, MaxAlign, Skew); } - // Then assign frame offsets to stack objects that are not used to spill - // callee saved registers. + SmallVector<int, 8> ObjectsToAllocate; + + // Then prepare to assign frame offsets to stack objects that are not used to + // spill callee saved registers. for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { if (MFI->isObjectPreAllocated(i) && MFI->getUseLocalStackAllocationBlock()) @@ -717,14 +884,43 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (MFI->isDeadObjectIndex(i)) continue; - if (MFI->getStackProtectorIndex() == (int)i) + if (MFI->getStackProtectorIndex() == (int)i || + EHRegNodeFrameIndex == (int)i) continue; if (ProtectedObjs.count(i)) continue; - AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew); + // Add the objects that we need to allocate to our working set. + ObjectsToAllocate.push_back(i); } + // Allocate the EH registration node first if one is present. + if (EHRegNodeFrameIndex != INT_MAX) + AdjustStackOffset(MFI, EHRegNodeFrameIndex, StackGrowsDown, Offset, + MaxAlign, Skew); + + // Give the targets a chance to order the objects the way they like it. + if (Fn.getTarget().getOptLevel() != CodeGenOpt::None && + Fn.getTarget().Options.StackSymbolOrdering) + TFI.orderFrameObjects(Fn, ObjectsToAllocate); + + // Keep track of which bytes in the fixed and callee-save range are used so we + // can use the holes when allocating later stack objects. Only do this if + // stack protector isn't being used and the target requests it and we're + // optimizing. + BitVector StackBytesFree; + if (!ObjectsToAllocate.empty() && + Fn.getTarget().getOptLevel() != CodeGenOpt::None && + MFI->getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(Fn)) + computeFreeStackSlots(MFI, StackGrowsDown, MinCSFrameIndex, MaxCSFrameIndex, + FixedCSEnd, StackBytesFree); + + // Now walk the objects and actually assign base offsets to them. + for (auto &Object : ObjectsToAllocate) + if (!scavengeStackSlot(MFI, Object, StackGrowsDown, MaxAlign, + StackBytesFree)) + AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign, Skew); + // Make sure the special register scavenging spill slot is closest to the // stack pointer. if (RS && !EarlyScavengingSlots) { @@ -757,7 +953,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // If the frame pointer is eliminated, all frame offsets will be relative to // SP not FP. Align to MaxAlign so this works. StackAlign = std::max(StackAlign, MaxAlign); - Offset = RoundUpToAlignment(Offset, StackAlign, Skew); + Offset = alignTo(Offset, StackAlign, Skew); } // Update frame info to pretend that this is part of the stack... @@ -851,7 +1047,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); - if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); + if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(*BB); bool InsideCallSequence = false; @@ -860,38 +1056,31 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { InsideCallSequence = (I->getOpcode() == FrameSetupOpcode); - SPAdj += TII.getSPAdjust(I); - - MachineBasicBlock::iterator PrevI = BB->end(); - if (I != BB->begin()) PrevI = std::prev(I); - TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); + SPAdj += TII.getSPAdjust(*I); - // Visit the instructions created by eliminateCallFramePseudoInstr(). - if (PrevI == BB->end()) - I = BB->begin(); // The replaced instr was the first in the block. - else - I = std::next(PrevI); + I = TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); continue; } - MachineInstr *MI = I; + MachineInstr &MI = *I; bool DoIncr = true; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - if (!MI->getOperand(i).isFI()) + bool DidFinishLoop = true; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + if (!MI.getOperand(i).isFI()) continue; // Frame indices in debug values are encoded in a target independent // way with simply the frame index and offset rather than any // target-specific addressing mode. - if (MI->isDebugValue()) { + if (MI.isDebugValue()) { assert(i == 0 && "Frame indices can only appear as the first " "operand of a DBG_VALUE machine instruction"); unsigned Reg; - MachineOperand &Offset = MI->getOperand(1); - Offset.setImm(Offset.getImm() + - TFI->getFrameIndexReference( - Fn, MI->getOperand(0).getIndex(), Reg)); - MI->getOperand(0).ChangeToRegister(Reg, false /*isDef*/); + MachineOperand &Offset = MI.getOperand(1); + Offset.setImm( + Offset.getImm() + + TFI->getFrameIndexReference(Fn, MI.getOperand(0).getIndex(), Reg)); + MI.getOperand(0).ChangeToRegister(Reg, false /*isDef*/); continue; } @@ -900,18 +1089,16 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, // implementation other than historical accident. The only // remaining difference is the unconditional use of the stack // pointer as the base register. - if (MI->getOpcode() == TargetOpcode::STATEPOINT) { - assert((!MI->isDebugValue() || i == 0) && + if (MI.getOpcode() == TargetOpcode::STATEPOINT) { + assert((!MI.isDebugValue() || i == 0) && "Frame indicies can only appear as the first operand of a " "DBG_VALUE machine instruction"); unsigned Reg; - MachineOperand &Offset = MI->getOperand(i + 1); - const unsigned refOffset = - TFI->getFrameIndexReferenceFromSP(Fn, MI->getOperand(i).getIndex(), - Reg); - + MachineOperand &Offset = MI.getOperand(i + 1); + int refOffset = TFI->getFrameIndexReferencePreferSP( + Fn, MI.getOperand(i).getIndex(), Reg, /*IgnoreSPUpdates*/ false); Offset.setImm(Offset.getImm() + refOffset); - MI->getOperand(i).ChangeToRegister(Reg, false /*isDef*/); + MI.getOperand(i).ChangeToRegister(Reg, false /*isDef*/); continue; } @@ -937,7 +1124,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, DoIncr = false; } - MI = nullptr; + DidFinishLoop = false; break; } @@ -948,45 +1135,46 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, // Note that this must come after eliminateFrameIndex, because // if I itself referred to a frame index, we shouldn't count its own // adjustment. - if (MI && InsideCallSequence) + if (DidFinishLoop && InsideCallSequence) SPAdj += TII.getSPAdjust(MI); if (DoIncr && I != BB->end()) ++I; // Update register states. - if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); + if (RS && !FrameIndexVirtualScavenging && DidFinishLoop) + RS->forward(MI); } } -/// scavengeFrameVirtualRegs - Replace all frame index virtual registers +/// doScavengeFrameVirtualRegs - Replace all frame index virtual registers /// with physical registers. Use the register scavenger to find an /// appropriate register to use. /// /// FIXME: Iterating over the instruction stream is unnecessary. We can simply /// iterate over the vreg use list, which at this point only contains machine /// operands for which eliminateFrameIndex need a new scratch reg. -void -PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { +static void +doScavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger *RS) { // Run through the instructions and find any virtual registers. - for (MachineFunction::iterator BB = Fn.begin(), - E = Fn.end(); BB != E; ++BB) { - RS->enterBasicBlock(&*BB); + MachineRegisterInfo &MRI = MF.getRegInfo(); + for (MachineBasicBlock &MBB : MF) { + RS->enterBasicBlock(MBB); int SPAdj = 0; - // The instruction stream may change in the loop, so check BB->end() + // The instruction stream may change in the loop, so check MBB.end() // directly. - for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) { // We might end up here again with a NULL iterator if we scavenged a // register for which we inserted spill code for definition by what was - // originally the first instruction in BB. + // originally the first instruction in MBB. if (I == MachineBasicBlock::iterator(nullptr)) - I = BB->begin(); + I = MBB.begin(); - MachineInstr *MI = I; + const MachineInstr &MI = *I; MachineBasicBlock::iterator J = std::next(I); MachineBasicBlock::iterator P = - I == BB->begin() ? MachineBasicBlock::iterator(nullptr) + I == MBB.begin() ? MachineBasicBlock::iterator(nullptr) : std::prev(I); // RS should process this instruction before we might scavenge at this @@ -995,35 +1183,31 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // instruction are available, and defined registers are not. RS->forward(I); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - if (MI->getOperand(i).isReg()) { - MachineOperand &MO = MI->getOperand(i); - unsigned Reg = MO.getReg(); - if (Reg == 0) - continue; - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - - // When we first encounter a new virtual register, it - // must be a definition. - assert(MI->getOperand(i).isDef() && - "frame index virtual missing def!"); - // Scavenge a new scratch register - const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); - unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj); - - ++NumScavengedRegs; - - // Replace this reference to the virtual register with the - // scratch register. - assert (ScratchReg && "Missing scratch register!"); - Fn.getRegInfo().replaceRegWith(Reg, ScratchReg); - - // Because this instruction was processed by the RS before this - // register was allocated, make sure that the RS now records the - // register as being used. - RS->setRegUsed(ScratchReg); - } + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + // When we first encounter a new virtual register, it + // must be a definition. + assert(MO.isDef() && "frame index virtual missing def!"); + // Scavenge a new scratch register + const TargetRegisterClass *RC = MRI.getRegClass(Reg); + unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj); + + ++NumScavengedRegs; + + // Replace this reference to the virtual register with the + // scratch register. + assert(ScratchReg && "Missing scratch register!"); + MRI.replaceRegWith(Reg, ScratchReg); + + // Because this instruction was processed by the RS before this + // register was allocated, make sure that the RS now records the + // register as being used. + RS->setRegUsed(ScratchReg); } // If the scavenger needed to use one of its spill slots, the @@ -1031,7 +1215,7 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // problem because we need the spill code before I: Move I to just // prior to J. if (I != std::prev(J)) { - BB->splice(J, &*BB, I); + MBB.splice(J, &MBB, I); // Before we move I, we need to prepare the RS to visit I again. // Specifically, RS will assert if it sees uses of registers that diff --git a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp index 1f46417..804a4c3 100644 --- a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp +++ b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp @@ -11,16 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Mutex.h" #include "llvm/Support/raw_ostream.h" -#include <map> using namespace llvm; static const char *const PSVNames[] = { diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp index 16ff48e..93eeb9c 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp @@ -22,9 +22,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Target/TargetRegisterInfo.h" -#ifndef NDEBUG -#include "llvm/ADT/SparseBitVector.h" -#endif #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -153,3 +150,12 @@ void RegAllocBase::allocatePhysRegs() { } } } + +void RegAllocBase::postOptimization() { + spiller().postOptimization(); + for (auto DeadInst : DeadRemats) { + LIS->RemoveMachineInstrFromMaps(*DeadInst); + DeadInst->eraseFromParent(); + } + DeadRemats.clear(); +} diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm/lib/CodeGen/RegAllocBase.h index 659b8f5..296ffe8 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBase.h +++ b/contrib/llvm/lib/CodeGen/RegAllocBase.h @@ -65,6 +65,12 @@ protected: LiveRegMatrix *Matrix; RegisterClassInfo RegClassInfo; + /// Inst which is a def of an original reg and whose defs are already all + /// dead after remat is saved in DeadRemats. The deletion of such inst is + /// postponed till all the allocations are done, so its remat expr is + /// always available for the remat of all the siblings of the original reg. + SmallPtrSet<MachineInstr *, 32> DeadRemats; + RegAllocBase() : TRI(nullptr), MRI(nullptr), VRM(nullptr), LIS(nullptr), Matrix(nullptr) {} @@ -77,6 +83,10 @@ protected: // physical register assignments. void allocatePhysRegs(); + // Include spiller post optimization and removing dead defs left because of + // rematerialization. + virtual void postOptimization(); + // Get a temporary reference to a Spiller instance. virtual Spiller &spiller() = 0; diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp index cfe367d..11dfda6 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -199,7 +199,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, Matrix->unassign(Spill); // Spill the extracted interval. - LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM); + LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats); spiller().spill(LRE); } return true; @@ -258,7 +258,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, DEBUG(dbgs() << "spilling: " << VirtReg << '\n'); if (!VirtReg.isSpillable()) return ~0u; - LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM); + LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats); spiller().spill(LRE); // The live virtual register requesting allocation was spilled, so tell @@ -283,6 +283,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); allocatePhysRegs(); + postOptimization(); // Diagnostic output before rewriting DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n"); diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp index 8d7a721..55fb33e 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/STLExtras.h" @@ -25,13 +24,12 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> @@ -52,6 +50,7 @@ namespace { static char ID; RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1), isBulkSpilling(false) {} + private: MachineFunction *MF; MachineRegisterInfo *MRI; @@ -159,6 +158,11 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } + MachineFunctionProperties getSetProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } + private: bool runOnMachineFunction(MachineFunction &Fn) override; void AllocateBasicBlock(); @@ -174,7 +178,7 @@ namespace { void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg); void usePhysReg(MachineOperand&); - void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState); + void definePhysReg(MachineInstr &MI, unsigned PhysReg, RegState NewState); unsigned calcSpillCost(unsigned PhysReg) const; void assignVirtToPhysReg(LiveReg&, unsigned PhysReg); LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) { @@ -184,11 +188,11 @@ namespace { return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); } LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, unsigned PhysReg); - LiveRegMap::iterator allocVirtReg(MachineInstr *MI, LiveRegMap::iterator, + LiveRegMap::iterator allocVirtReg(MachineInstr &MI, LiveRegMap::iterator, unsigned Hint); - LiveRegMap::iterator defineVirtReg(MachineInstr *MI, unsigned OpNum, + LiveRegMap::iterator defineVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg, unsigned Hint); - LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum, + LiveRegMap::iterator reloadVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg, unsigned Hint); void spillAll(MachineBasicBlock::iterator MI); bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg); @@ -280,7 +284,7 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, if (LR.Dirty) { // If this physreg is used by the instruction, we want to kill it on the // instruction, not on the spill. - bool SpillKill = LR.LastUse != MI; + bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI; LR.Dirty = false; DEBUG(dbgs() << "Spilling " << PrintReg(LRI->VirtReg, TRI) << " in " << PrintReg(LR.PhysReg, TRI)); @@ -345,6 +349,11 @@ void RAFast::usePhysReg(MachineOperand &MO) { unsigned PhysReg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Bad usePhysReg operand"); + + // Ignore undef uses. + if (MO.isUndef()) + return; + markRegUsedInInstr(PhysReg); switch (PhysRegState[PhysReg]) { case regDisabled: @@ -404,7 +413,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { /// definePhysReg - Mark PhysReg as reserved or free after spilling any /// virtregs. This is very similar to defineVirtReg except the physreg is /// reserved instead of allocated. -void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, +void RAFast::definePhysReg(MachineInstr &MI, unsigned PhysReg, RegState NewState) { markRegUsedInInstr(PhysReg); switch (unsigned VirtReg = PhysRegState[PhysReg]) { @@ -512,7 +521,7 @@ RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { } /// allocVirtReg - Allocate a physical register for VirtReg. -RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, +RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr &MI, LiveRegMap::iterator LRI, unsigned Hint) { const unsigned VirtReg = LRI->VirtReg; @@ -577,18 +586,19 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, } // Nothing we can do. Report an error and keep going with a bad allocation. - if (MI->isInlineAsm()) - MI->emitError("inline assembly requires more registers than available"); + if (MI.isInlineAsm()) + MI.emitError("inline assembly requires more registers than available"); else - MI->emitError("ran out of registers during register allocation"); + MI.emitError("ran out of registers during register allocation"); definePhysReg(MI, *AO.begin(), regFree); return assignVirtToPhysReg(VirtReg, *AO.begin()); } /// defineVirtReg - Allocate a register for VirtReg and mark it as dirty. -RAFast::LiveRegMap::iterator -RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, - unsigned VirtReg, unsigned Hint) { +RAFast::LiveRegMap::iterator RAFast::defineVirtReg(MachineInstr &MI, + unsigned OpNum, + unsigned VirtReg, + unsigned Hint) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator LRI; @@ -607,11 +617,11 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, } else if (LRI->LastUse) { // Redefining a live register - kill at the last use, unless it is this // instruction defining VirtReg multiple times. - if (LRI->LastUse != MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse()) + if (LRI->LastUse != &MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse()) addKillFlag(*LRI); } assert(LRI->PhysReg && "Register not assigned"); - LRI->LastUse = MI; + LRI->LastUse = &MI; LRI->LastOpNum = OpNum; LRI->Dirty = true; markRegUsedInInstr(LRI->PhysReg); @@ -619,15 +629,16 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, } /// reloadVirtReg - Make sure VirtReg is available in a physreg and return it. -RAFast::LiveRegMap::iterator -RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, - unsigned VirtReg, unsigned Hint) { +RAFast::LiveRegMap::iterator RAFast::reloadVirtReg(MachineInstr &MI, + unsigned OpNum, + unsigned VirtReg, + unsigned Hint) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); - MachineOperand &MO = MI->getOperand(OpNum); + MachineOperand &MO = MI.getOperand(OpNum); if (New) { LRI = allocVirtReg(MI, LRI, Hint); const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); @@ -662,7 +673,7 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, MO.setIsDead(false); } assert(LRI->PhysReg && "Register not assigned"); - LRI->LastUse = MI; + LRI->LastUse = &MI; LRI->LastOpNum = OpNum; markRegUsedInInstr(LRI->PhysReg); return LRI; @@ -728,7 +739,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI, markRegUsedInInstr(Reg); for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { if (ThroughRegs.count(PhysRegState[*AI])) - definePhysReg(MI, *AI, regFree); + definePhysReg(*MI, *AI, regFree); } } @@ -744,7 +755,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI, if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue; DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand " << DefIdx << ".\n"); - LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); + LiveRegMap::iterator LRI = reloadVirtReg(*MI, i, Reg, 0); unsigned PhysReg = LRI->PhysReg; setPhysReg(MI, i, PhysReg); // Note: we don't update the def operand yet. That would cause the normal @@ -753,7 +764,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI, DEBUG(dbgs() << "Partial redefine: " << MO << "\n"); // Reload the register, but don't assign to the operand just yet. // That would confuse the later phys-def processing pass. - LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); + LiveRegMap::iterator LRI = reloadVirtReg(*MI, i, Reg, 0); PartialDefs.push_back(LRI->PhysReg); } } @@ -767,7 +778,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI, if (!MO.isEarlyClobber()) continue; // Note: defineVirtReg may invalidate MO. - LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0); + LiveRegMap::iterator LRI = defineVirtReg(*MI, i, Reg, 0); unsigned PhysReg = LRI->PhysReg; if (setPhysReg(MI, i, PhysReg)) VirtDead.push_back(Reg); @@ -801,14 +812,14 @@ void RAFast::AllocateBasicBlock() { // Add live-in registers as live. for (const auto &LI : MBB->liveins()) if (MRI->isAllocatable(LI.PhysReg)) - definePhysReg(MII, LI.PhysReg, regReserved); + definePhysReg(*MII, LI.PhysReg, regReserved); SmallVector<unsigned, 8> VirtDead; SmallVector<MachineInstr*, 32> Coalesced; // Otherwise, sequentially allocate each instruction in the MBB. while (MII != MBB->end()) { - MachineInstr *MI = MII++; + MachineInstr *MI = &*MII++; const MCInstrDesc &MCID = MI->getDesc(); DEBUG({ dbgs() << "\n>> " << *MI << "Regs:"; @@ -943,8 +954,8 @@ void RAFast::AllocateBasicBlock() { if (MO.isUse()) { usePhysReg(MO); } else if (MO.isEarlyClobber()) { - definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ? - regFree : regReserved); + definePhysReg(*MI, Reg, + (MO.isImplicit() || MO.isDead()) ? regFree : regReserved); hasEarlyClobbers = true; } else hasPhysDefs = true; @@ -977,7 +988,7 @@ void RAFast::AllocateBasicBlock() { unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (MO.isUse()) { - LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst); + LiveRegMap::iterator LRI = reloadVirtReg(*MI, i, Reg, CopyDst); unsigned PhysReg = LRI->PhysReg; CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0; if (setPhysReg(MI, i, PhysReg)) @@ -1027,10 +1038,10 @@ void RAFast::AllocateBasicBlock() { if (TargetRegisterInfo::isPhysicalRegister(Reg)) { if (!MRI->isAllocatable(Reg)) continue; - definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); + definePhysReg(*MI, Reg, MO.isDead() ? regFree : regReserved); continue; } - LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc); + LiveRegMap::iterator LRI = defineVirtReg(*MI, i, Reg, CopySrc); unsigned PhysReg = LRI->PhysReg; if (setPhysReg(MI, i, PhysReg)) { VirtDead.push_back(Reg); diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp index 945cb9e..c4d4b1e 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "AllocationOrder.h" #include "InterferenceCache.h" #include "LiveDebugVariables.h" @@ -33,6 +32,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/VirtRegMap.h" @@ -44,6 +44,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <queue> @@ -55,14 +56,14 @@ STATISTIC(NumGlobalSplits, "Number of split global live ranges"); STATISTIC(NumLocalSplits, "Number of split local live ranges"); STATISTIC(NumEvicted, "Number of interferences evicted"); -static cl::opt<SplitEditor::ComplementSpillMode> -SplitSpillMode("split-spill-mode", cl::Hidden, - cl::desc("Spill mode for splitting live ranges"), - cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), - clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), - clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"), - clEnumValEnd), - cl::init(SplitEditor::SM_Partition)); +static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode( + "split-spill-mode", cl::Hidden, + cl::desc("Spill mode for splitting live ranges"), + cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), + clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), + clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"), + clEnumValEnd), + cl::init(SplitEditor::SM_Speed)); static cl::opt<unsigned> LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, @@ -128,6 +129,7 @@ class RAGreedy : public MachineFunctionPass, EdgeBundles *Bundles; SpillPlacement *SpillPlacer; LiveDebugVariables *DebugVars; + AliasAnalysis *AA; // state std::unique_ptr<Spiller> SpillerInstance; @@ -954,22 +956,28 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, // Interference for the live-in value. if (BI.LiveIn) { - if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) - BC.Entry = SpillPlacement::MustSpill, ++Ins; - else if (Intf.first() < BI.FirstInstr) - BC.Entry = SpillPlacement::PrefSpill, ++Ins; - else if (Intf.first() < BI.LastInstr) + if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) { + BC.Entry = SpillPlacement::MustSpill; + ++Ins; + } else if (Intf.first() < BI.FirstInstr) { + BC.Entry = SpillPlacement::PrefSpill; ++Ins; + } else if (Intf.first() < BI.LastInstr) { + ++Ins; + } } // Interference for the live-out value. if (BI.LiveOut) { - if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) - BC.Exit = SpillPlacement::MustSpill, ++Ins; - else if (Intf.last() > BI.LastInstr) - BC.Exit = SpillPlacement::PrefSpill, ++Ins; - else if (Intf.last() > BI.FirstInstr) + if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) { + BC.Exit = SpillPlacement::MustSpill; ++Ins; + } else if (Intf.last() > BI.LastInstr) { + BC.Exit = SpillPlacement::PrefSpill; + ++Ins; + } else if (Intf.last() > BI.FirstInstr) { + ++Ins; + } } // Accumulate the total frequency of inserted spill code. @@ -1392,8 +1400,10 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, if (i == BestCand || !GlobalCand[i].PhysReg) continue; unsigned Count = GlobalCand[i].LiveBundles.count(); - if (Count < WorstCount) - Worst = i, WorstCount = Count; + if (Count < WorstCount) { + Worst = i; + WorstCount = Count; + } } --NumCands; GlobalCand[Worst] = GlobalCand[NumCands]; @@ -1457,7 +1467,7 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, SmallVectorImpl<unsigned> &NewVRegs) { SmallVector<unsigned, 8> UsedCands; // Prepare split editor. - LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); SE->reset(LREdit, SplitSpillMode); // Assign all edge bundles to the preferred candidate, or NoCand. @@ -1505,7 +1515,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); unsigned Reg = VirtReg.reg; bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); - LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); SE->reset(LREdit, SplitSpillMode); ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { @@ -1577,7 +1587,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Always enable split spill mode, since we're effectively spilling to a // register. - LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); SE->reset(LREdit, SplitEditor::SM_Size); ArrayRef<SlotIndex> Uses = SA->getUseSlots(); @@ -1900,7 +1910,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, << '-' << Uses[BestAfter] << ", " << BestDiff << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); - LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); SE->reset(LREdit); SE->openIntv(); @@ -2543,7 +2553,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, NewVRegs.push_back(VirtReg.reg); } else { NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); - LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); spiller().spill(LRE); setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); @@ -2583,6 +2593,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { Bundles = &getAnalysis<EdgeBundles>(); SpillPlacer = &getAnalysis<SpillPlacement>(); DebugVars = &getAnalysis<LiveDebugVariables>(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); initializeCSRCost(); @@ -2591,7 +2602,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { DEBUG(LIS->dump()); SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); - SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI)); + SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI)); ExtraRegInfo.clear(); ExtraRegInfo.resize(MRI->getNumVirtRegs()); NextCascade = 1; @@ -2601,6 +2612,8 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { allocatePhysRegs(); tryHintsRecoloring(); + postOptimization(); + releaseMemory(); return true; } diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp index fd28b05..d1221ec 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -123,6 +123,12 @@ private: RegSet VRegsToAlloc, EmptyIntervalVRegs; + /// Inst which is a def of an original reg and whose defs are already all + /// dead after remat is saved in DeadRemats. The deletion of such inst is + /// postponed till all the allocations are done, so its remat expr is + /// always available for the remat of all the siblings of the original reg. + SmallPtrSet<MachineInstr *, 32> DeadRemats; + /// \brief Finds the initial set of vreg intervals to allocate. void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS); @@ -146,6 +152,7 @@ private: void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM) const; + void postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS); }; char RegAllocPBQP::ID = 0; @@ -631,7 +638,8 @@ void RegAllocPBQP::spillVReg(unsigned VReg, VirtRegMap &VRM, Spiller &VRegSpiller) { VRegsToAlloc.erase(VReg); - LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM); + LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM, + nullptr, &DeadRemats); VRegSpiller.spill(LRE); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); @@ -713,6 +721,16 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF, } } +void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) { + VRegSpiller.postOptimization(); + /// Remove dead defs because of rematerialization. + for (auto DeadInst : DeadRemats) { + LIS.RemoveMachineInstrFromMaps(*DeadInst); + DeadInst->eraseFromParent(); + } + DeadRemats.clear(); +} + static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size, unsigned NumInstr) { // All intervals have a spill weight that is mostly proportional to the number @@ -798,6 +816,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { // Finalise allocation, allocate empty ranges. finalizeAlloc(MF, LIS, VRM); + postOptimization(*VRegSpiller, LIS); VRegsToAlloc.clear(); EmptyIntervalVRegs.clear(); @@ -839,7 +858,7 @@ void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const { } } -void PBQP::RegAlloc::PBQPRAGraph::dump() const { dump(dbgs()); } +LLVM_DUMP_METHOD void PBQP::RegAlloc::PBQPRAGraph::dump() const { dump(dbgs()); } void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream &OS) const { OS << "graph {\n"; diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp new file mode 100644 index 0000000..50b8854 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp @@ -0,0 +1,142 @@ +//===-- RegUsageInfoCollector.cpp - Register Usage Information Collector --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// This pass is required to take advantage of the interprocedural register +/// allocation infrastructure. +/// +/// This pass is simple MachineFunction pass which collects register usage +/// details by iterating through each physical registers and checking +/// MRI::isPhysRegUsed() then creates a RegMask based on this details. +/// The pass then stores this RegMask in PhysicalRegisterUsageInfo.cpp +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterUsageInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" + +using namespace llvm; + +#define DEBUG_TYPE "ip-regalloc" + +STATISTIC(NumCSROpt, + "Number of functions optimized for callee saved registers"); + +namespace llvm { +void initializeRegUsageInfoCollectorPass(PassRegistry &); +} + +namespace { +class RegUsageInfoCollector : public MachineFunctionPass { +public: + RegUsageInfoCollector() : MachineFunctionPass(ID) { + PassRegistry &Registry = *PassRegistry::getPassRegistry(); + initializeRegUsageInfoCollectorPass(Registry); + } + + const char *getPassName() const override { + return "Register Usage Information Collector Pass"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnMachineFunction(MachineFunction &MF) override; + + static char ID; + +private: + void markRegClobbered(const TargetRegisterInfo *TRI, uint32_t *RegMask, + unsigned PReg); +}; +} // end of anonymous namespace + +char RegUsageInfoCollector::ID = 0; + +INITIALIZE_PASS_BEGIN(RegUsageInfoCollector, "RegUsageInfoCollector", + "Register Usage Information Collector", false, false) +INITIALIZE_PASS_DEPENDENCY(PhysicalRegisterUsageInfo) +INITIALIZE_PASS_END(RegUsageInfoCollector, "RegUsageInfoCollector", + "Register Usage Information Collector", false, false) + +FunctionPass *llvm::createRegUsageInfoCollector() { + return new RegUsageInfoCollector(); +} + +void RegUsageInfoCollector::markRegClobbered(const TargetRegisterInfo *TRI, + uint32_t *RegMask, unsigned PReg) { + // If PReg is clobbered then all of its alias are also clobbered. + for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI) + RegMask[*AI / 32] &= ~(1u << *AI % 32); +} + +void RegUsageInfoCollector::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<PhysicalRegisterUsageInfo>(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { + MachineRegisterInfo *MRI = &MF.getRegInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const TargetMachine &TM = MF.getTarget(); + + DEBUG(dbgs() << " -------------------- " << getPassName() + << " -------------------- \n"); + DEBUG(dbgs() << "Function Name : " << MF.getName() << "\n"); + + std::vector<uint32_t> RegMask; + + // Compute the size of the bit vector to represent all the registers. + // The bit vector is broken into 32-bit chunks, thus takes the ceil of + // the number of registers divided by 32 for the size. + unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32; + RegMask.resize(RegMaskSize, 0xFFFFFFFF); + + const Function *F = MF.getFunction(); + + PhysicalRegisterUsageInfo *PRUI = &getAnalysis<PhysicalRegisterUsageInfo>(); + + PRUI->setTargetMachine(&TM); + + DEBUG(dbgs() << "Clobbered Registers: "); + + for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) + if (MRI->isPhysRegModified(PReg, true)) + markRegClobbered(TRI, &RegMask[0], PReg); + + if (!TargetFrameLowering::isSafeForNoCSROpt(F)) { + const uint32_t *CallPreservedMask = + TRI->getCallPreservedMask(MF, F->getCallingConv()); + // Set callee saved register as preserved. + for (unsigned i = 0; i < RegMaskSize; ++i) + RegMask[i] = RegMask[i] | CallPreservedMask[i]; + } else { + ++NumCSROpt; + DEBUG(dbgs() << MF.getName() + << " function optimized for not having CSR.\n"); + } + + for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) + if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg)) + DEBUG(dbgs() << TRI->getName(PReg) << " "); + + DEBUG(dbgs() << " \n----------------------------------------\n"); + + PRUI->storeUpdateRegUsageInfo(F, std::move(RegMask)); + + return false; +} diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp new file mode 100644 index 0000000..7595661 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp @@ -0,0 +1,131 @@ +//=--- RegUsageInfoPropagate.cpp - Register Usage Informartion Propagation --=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// This pass is required to take advantage of the interprocedural register +/// allocation infrastructure. +/// +/// This pass iterates through MachineInstrs in a given MachineFunction and at +/// each callsite queries RegisterUsageInfo for RegMask (calculated based on +/// actual register allocation) of the callee function, if the RegMask detail +/// is available then this pass will update the RegMask of the call instruction. +/// This updated RegMask will be used by the register allocator while allocating +/// the current MachineFunction. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterUsageInfo.h" +#include "llvm/IR/Module.h" +#include "llvm/PassAnalysisSupport.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include <map> +#include <string> + +namespace llvm { +void initializeRegUsageInfoPropagationPassPass(PassRegistry &); +} + +using namespace llvm; + +#define DEBUG_TYPE "ip-regalloc" + +#define RUIP_NAME "Register Usage Information Propagation" + +namespace { +class RegUsageInfoPropagationPass : public MachineFunctionPass { + +public: + RegUsageInfoPropagationPass() : MachineFunctionPass(ID) { + PassRegistry &Registry = *PassRegistry::getPassRegistry(); + initializeRegUsageInfoPropagationPassPass(Registry); + } + + const char *getPassName() const override { return RUIP_NAME; } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + static char ID; + +private: + static void setRegMask(MachineInstr &MI, const uint32_t *RegMask) { + for (MachineOperand &MO : MI.operands()) { + if (MO.isRegMask()) + MO.setRegMask(RegMask); + } + } +}; +} // end of anonymous namespace +char RegUsageInfoPropagationPass::ID = 0; + +INITIALIZE_PASS_BEGIN(RegUsageInfoPropagationPass, "reg-usage-propagation", + RUIP_NAME, false, false) +INITIALIZE_PASS_DEPENDENCY(PhysicalRegisterUsageInfo) +INITIALIZE_PASS_END(RegUsageInfoPropagationPass, "reg-usage-propagation", + RUIP_NAME, false, false) + +FunctionPass *llvm::createRegUsageInfoPropPass() { + return new RegUsageInfoPropagationPass(); +} + +void RegUsageInfoPropagationPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<PhysicalRegisterUsageInfo>(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool RegUsageInfoPropagationPass::runOnMachineFunction(MachineFunction &MF) { + const Module *M = MF.getFunction()->getParent(); + PhysicalRegisterUsageInfo *PRUI = &getAnalysis<PhysicalRegisterUsageInfo>(); + + DEBUG(dbgs() << " ++++++++++++++++++++ " << getPassName() + << " ++++++++++++++++++++ \n"); + DEBUG(dbgs() << "MachineFunction : " << MF.getName() << "\n"); + + bool Changed = false; + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (!MI.isCall()) + continue; + DEBUG(dbgs() + << "Call Instruction Before Register Usage Info Propagation : \n"); + DEBUG(dbgs() << MI << "\n"); + + auto UpdateRegMask = [&](const Function *F) { + const auto *RegMask = PRUI->getRegUsageInfo(F); + if (!RegMask) + return; + setRegMask(MI, &(*RegMask)[0]); + Changed = true; + }; + + MachineOperand &Operand = MI.getOperand(0); + if (Operand.isGlobal()) + UpdateRegMask(cast<Function>(Operand.getGlobal())); + else if (Operand.isSymbol()) + UpdateRegMask(M->getFunction(Operand.getSymbolName())); + + DEBUG(dbgs() + << "Call Instruction After Register Usage Info Propagation : \n"); + DEBUG(dbgs() << MI << "\n"); + } + } + + DEBUG(dbgs() << " +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + "++++++ \n"); + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp index c1ff13e..617ece9 100644 --- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -203,6 +203,16 @@ namespace { /// make sure to set it to the correct physical subregister. void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); + /// If the given machine operand reads only undefined lanes add an undef + /// flag. + /// This can happen when undef uses were previously concealed by a copy + /// which we coalesced. Example: + /// %vreg0:sub0<def,read-undef> = ... + /// %vreg1 = COPY %vreg0 <-- Coalescing COPY reveals undef + /// = use %vreg1:sub1 <-- hidden undef use + void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, + MachineOperand &MO, unsigned SubRegIdx); + /// Handle copies of undef values. /// Returns true if @p CopyMI was a copy of an undef value and eliminated. bool eliminateUndefCopy(MachineInstr *CopyMI); @@ -467,7 +477,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); LiveInterval &IntB = LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); + SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot(); // We have a non-trivially-coalescable copy with IntA being the source and // IntB being the dest, thus this defines a value number in IntB. If the @@ -642,7 +652,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // BValNo is a value number in B that is defined by a copy from A. 'B1' in // the example above. - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); + SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot(); VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); assert(BValNo != nullptr && BValNo->def == CopyIdx); @@ -674,7 +684,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3, // op#2<->op#3) of commute transformation should be considered/tried here. unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex; - if (!TII->findCommutedOpIndices(DefMI, UseOpIdx, NewDstIdx)) + if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx)) return false; MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); @@ -692,7 +702,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg)) { MachineInstr *UseMI = MO.getParent(); unsigned OpNo = &MO - &UseMI->getOperand(0); - SlotIndex UseIdx = LIS->getInstructionIndex(UseMI); + SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI); LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); if (US == IntA.end() || US->valno != AValNo) continue; @@ -708,7 +718,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // transformation. Start by commuting the instruction. MachineBasicBlock *MBB = DefMI->getParent(); MachineInstr *NewMI = - TII->commuteInstruction(DefMI, false, UseOpIdx, NewDstIdx); + TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx); if (!NewMI) return false; if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && @@ -716,7 +726,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg))) return false; if (NewMI != DefMI) { - LIS->ReplaceMachineInstrInMaps(DefMI, NewMI); + LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI); MachineBasicBlock::iterator Pos = DefMI; MBB->insert(Pos, NewMI); MBB->erase(DefMI); @@ -746,7 +756,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, UseMO.setReg(NewReg); continue; } - SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true); + SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(true); LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); assert(US != IntA.end() && "Use must be live"); if (US->valno != AValNo) @@ -784,7 +794,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, } ErasedInstrs.insert(UseMI); - LIS->RemoveMachineInstrFromMaps(UseMI); + LIS->RemoveMachineInstrFromMaps(*UseMI); UseMI->eraseFromParent(); } @@ -879,7 +889,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, return false; LiveInterval &SrcInt = LIS->getInterval(SrcReg); - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI); + SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI); VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn(); assert(ValNo && "CopyMI input register not live"); if (ValNo->isPHIDef() || ValNo->isUnused()) @@ -891,9 +901,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, IsDefCopy = true; return false; } - if (!TII->isAsCheapAsAMove(DefMI)) + if (!TII->isAsCheapAsAMove(*DefMI)) return false; - if (!TII->isTriviallyReMaterializable(DefMI, AA)) + if (!TII->isTriviallyReMaterializable(*DefMI, AA)) return false; if (!definesFullReg(*DefMI, SrcReg)) return false; @@ -939,11 +949,13 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } } + DebugLoc DL = CopyMI->getDebugLoc(); MachineBasicBlock *MBB = CopyMI->getParent(); MachineBasicBlock::iterator MII = std::next(MachineBasicBlock::iterator(CopyMI)); - TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, DefMI, *TRI); - MachineInstr *NewMI = std::prev(MII); + TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, *DefMI, *TRI); + MachineInstr &NewMI = *std::prev(MII); + NewMI.setDebugLoc(DL); // In a situation like the following: // %vreg0:subreg = instr ; DefMI, subreg = DstIdx @@ -952,7 +964,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // %vreg1 = instr const TargetRegisterClass *NewRC = CP.getNewRC(); if (DstIdx != 0) { - MachineOperand &DefMO = NewMI->getOperand(0); + MachineOperand &DefMO = NewMI.getOperand(0); if (DefMO.getSubReg() == DstIdx) { assert(SrcIdx == 0 && CP.isFlipped() && "Shouldn't have SrcIdx+DstIdx at this point"); @@ -967,7 +979,24 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } } - LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); + // CopyMI may have implicit operands, save them so that we can transfer them + // over to the newly materialized instruction after CopyMI is removed. + SmallVector<MachineOperand, 4> ImplicitOps; + ImplicitOps.reserve(CopyMI->getNumOperands() - + CopyMI->getDesc().getNumOperands()); + for (unsigned I = CopyMI->getDesc().getNumOperands(), + E = CopyMI->getNumOperands(); + I != E; ++I) { + MachineOperand &MO = CopyMI->getOperand(I); + if (MO.isReg()) { + assert(MO.isImplicit() && "No explicit operands after implict operands."); + // Discard VReg implicit defs. + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + ImplicitOps.push_back(MO); + } + } + + LIS->ReplaceMachineInstrInMaps(*CopyMI, NewMI); CopyMI->eraseFromParent(); ErasedInstrs.insert(CopyMI); @@ -975,9 +1004,10 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // We need to remember these so we can add intervals once we insert // NewMI into SlotIndexes. SmallVector<unsigned, 4> NewMIImplDefs; - for (unsigned i = NewMI->getDesc().getNumOperands(), - e = NewMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = NewMI->getOperand(i); + for (unsigned i = NewMI.getDesc().getNumOperands(), + e = NewMI.getNumOperands(); + i != e; ++i) { + MachineOperand &MO = NewMI.getOperand(i); if (MO.isReg() && MO.isDef()) { assert(MO.isImplicit() && MO.isDead() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())); @@ -986,7 +1016,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } if (TargetRegisterInfo::isVirtualRegister(DstReg)) { - unsigned NewIdx = NewMI->getOperand(0).getSubReg(); + unsigned NewIdx = NewMI.getOperand(0).getSubReg(); if (DefRC != nullptr) { if (NewIdx) @@ -995,20 +1025,54 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, NewRC = TRI->getCommonSubClass(NewRC, DefRC); assert(NewRC && "subreg chosen for remat incompatible with instruction"); } + // Remap subranges to new lanemask and change register class. + LiveInterval &DstInt = LIS->getInterval(DstReg); + for (LiveInterval::SubRange &SR : DstInt.subranges()) { + SR.LaneMask = TRI->composeSubRegIndexLaneMask(DstIdx, SR.LaneMask); + } MRI->setRegClass(DstReg, NewRC); + // Update machine operands and add flags. updateRegDefsUses(DstReg, DstReg, DstIdx); - NewMI->getOperand(0).setSubReg(NewIdx); - } else if (NewMI->getOperand(0).getReg() != CopyDstReg) { + NewMI.getOperand(0).setSubReg(NewIdx); + // Add dead subregister definitions if we are defining the whole register + // but only part of it is live. + // This could happen if the rematerialization instruction is rematerializing + // more than actually is used in the register. + // An example would be: + // vreg1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs + // ; Copying only part of the register here, but the rest is undef. + // vreg2:sub_16bit<def, read-undef> = COPY vreg1:sub_16bit + // ==> + // ; Materialize all the constants but only using one + // vreg2 = LOAD_CONSTANTS 5, 8 + // + // at this point for the part that wasn't defined before we could have + // subranges missing the definition. + if (NewIdx == 0 && DstInt.hasSubRanges()) { + SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI); + SlotIndex DefIndex = + CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber()); + LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(DstReg); + VNInfo::Allocator& Alloc = LIS->getVNInfoAllocator(); + for (LiveInterval::SubRange &SR : DstInt.subranges()) { + if (!SR.liveAt(DefIndex)) + SR.createDeadDef(DefIndex, Alloc); + MaxMask &= ~SR.LaneMask; + } + if (MaxMask != 0) { + LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask); + SR->createDeadDef(DefIndex, Alloc); + } + } + } else if (NewMI.getOperand(0).getReg() != CopyDstReg) { // The New instruction may be defining a sub-register of what's actually // been asked for. If so it must implicitly define the whole thing. assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && "Only expect virtual or physical registers in remat"); - NewMI->getOperand(0).setIsDead(true); - NewMI->addOperand(MachineOperand::CreateReg(CopyDstReg, - true /*IsDef*/, - true /*IsImp*/, - false /*IsKill*/)); + NewMI.getOperand(0).setIsDead(true); + NewMI.addOperand(MachineOperand::CreateReg( + CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/)); // Record small dead def live-ranges for all the subregisters // of the destination register. // Otherwise, variables that live through may miss some @@ -1026,28 +1090,18 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // no live-ranges would have been created for ECX. // Fix that! SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); - for (MCRegUnitIterator Units(NewMI->getOperand(0).getReg(), TRI); + for (MCRegUnitIterator Units(NewMI.getOperand(0).getReg(), TRI); Units.isValid(); ++Units) if (LiveRange *LR = LIS->getCachedRegUnit(*Units)) LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); } - if (NewMI->getOperand(0).getSubReg()) - NewMI->getOperand(0).setIsUndef(); + if (NewMI.getOperand(0).getSubReg()) + NewMI.getOperand(0).setIsUndef(); - // CopyMI may have implicit operands, transfer them over to the newly - // rematerialized instruction. And update implicit def interval valnos. - for (unsigned i = CopyMI->getDesc().getNumOperands(), - e = CopyMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = CopyMI->getOperand(i); - if (MO.isReg()) { - assert(MO.isImplicit() && "No explicit operands after implict operands."); - // Discard VReg implicit defs. - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { - NewMI->addOperand(MO); - } - } - } + // Transfer over implicit operands to the rematerialized instruction. + for (MachineOperand &MO : ImplicitOps) + NewMI.addOperand(MO); SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) { @@ -1057,7 +1111,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); } - DEBUG(dbgs() << "Remat: " << *NewMI); + DEBUG(dbgs() << "Remat: " << NewMI); ++NumReMats; // The source interval can become smaller because we removed a use. @@ -1093,7 +1147,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); - SlotIndex Idx = LIS->getInstructionIndex(CopyMI); + SlotIndex Idx = LIS->getInstructionIndex(*CopyMI); const LiveInterval &SrcLI = LIS->getInterval(SrcReg); // CopyMI is undef iff SrcReg is not live before the instruction. if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) { @@ -1136,7 +1190,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { if (MO.isDef() /*|| MO.isUndef()*/) continue; const MachineInstr &MI = *MO.getParent(); - SlotIndex UseIdx = LIS->getInstructionIndex(&MI); + SlotIndex UseIdx = LIS->getInstructionIndex(MI); LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); bool isLive; if (UseMask != ~0u && DstLI.hasSubRanges()) { @@ -1159,12 +1213,51 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { return true; } +void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, + MachineOperand &MO, unsigned SubRegIdx) { + LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubRegIdx); + if (MO.isDef()) + Mask = ~Mask; + bool IsUndef = true; + for (const LiveInterval::SubRange &S : Int.subranges()) { + if ((S.LaneMask & Mask) == 0) + continue; + if (S.liveAt(UseIdx)) { + IsUndef = false; + break; + } + } + if (IsUndef) { + MO.setIsUndef(true); + // We found out some subregister use is actually reading an undefined + // value. In some cases the whole vreg has become undefined at this + // point so we have to potentially shrink the main range if the + // use was ending a live segment there. + LiveQueryResult Q = Int.Query(UseIdx); + if (Q.valueOut() == nullptr) + ShrinkMainRange = true; + } +} + void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx) { bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); + if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) { + for (MachineOperand &MO : MRI->reg_operands(DstReg)) { + unsigned SubReg = MO.getSubReg(); + if (SubReg == 0 || MO.isUndef()) + continue; + MachineInstr &MI = *MO.getParent(); + if (MI.isDebugValue()) + continue; + SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true); + addUndefFlag(*DstInt, UseIdx, MO, SubReg); + } + } + SmallPtrSet<MachineInstr*, 8> Visited; for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end(); @@ -1186,7 +1279,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, // If SrcReg wasn't read, it may still be the case that DstReg is live-in // because SrcReg is a sub-register. if (DstInt && !Reads && SubIdx) - Reads = DstInt->liveAt(LIS->getInstructionIndex(UseMI)); + Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI)); // Replace SrcReg with DstReg in all UseMI operands. for (unsigned i = 0, e = Ops.size(); i != e; ++i) { @@ -1206,30 +1299,11 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg); DstInt->createSubRangeFrom(Allocator, Mask, *DstInt); } - LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubIdx); - bool IsUndef = true; SlotIndex MIIdx = UseMI->isDebugValue() - ? LIS->getSlotIndexes()->getIndexBefore(UseMI) - : LIS->getInstructionIndex(UseMI); + ? LIS->getSlotIndexes()->getIndexBefore(*UseMI) + : LIS->getInstructionIndex(*UseMI); SlotIndex UseIdx = MIIdx.getRegSlot(true); - for (LiveInterval::SubRange &S : DstInt->subranges()) { - if ((S.LaneMask & Mask) == 0) - continue; - if (S.liveAt(UseIdx)) { - IsUndef = false; - break; - } - } - if (IsUndef) { - MO.setIsUndef(true); - // We found out some subregister use is actually reading an undefined - // value. In some cases the whole vreg has become undefined at this - // point so we have to potentially shrink the main range if the - // use was ending a live segment there. - LiveQueryResult Q = DstInt->Query(MIIdx); - if (Q.valueOut() == nullptr) - ShrinkMainRange = true; - } + addUndefFlag(*DstInt, UseIdx, MO, SubIdx); } if (DstIsPhys) @@ -1241,7 +1315,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, DEBUG({ dbgs() << "\t\tupdated: "; if (!UseMI->isDebugValue()) - dbgs() << LIS->getInstructionIndex(UseMI) << "\t"; + dbgs() << LIS->getInstructionIndex(*UseMI) << "\t"; dbgs() << *UseMI; }); } @@ -1267,7 +1341,7 @@ bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) { bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { Again = false; - DEBUG(dbgs() << LIS->getInstructionIndex(CopyMI) << '\t' << *CopyMI); + DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI); CoalescerPair CP(*TRI); if (!CP.setRegisters(CopyMI)) { @@ -1303,7 +1377,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { // Eliminate undefs. if (!CP.isPhys() && eliminateUndefCopy(CopyMI)) { - LIS->RemoveMachineInstrFromMaps(CopyMI); + LIS->RemoveMachineInstrFromMaps(*CopyMI); CopyMI->eraseFromParent(); return false; // Not coalescable. } @@ -1314,7 +1388,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { if (CP.getSrcReg() == CP.getDstReg()) { LiveInterval &LI = LIS->getInterval(CP.getSrcReg()); DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n'); - const SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI); + const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI); LiveQueryResult LRQ = LI.Query(CopyIdx); if (VNInfo *DefVNI = LRQ.valueDefined()) { VNInfo *ReadVNI = LRQ.valueIn(); @@ -1332,7 +1406,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { } DEBUG(dbgs() << "\tMerged values: " << LI << '\n'); } - LIS->RemoveMachineInstrFromMaps(CopyMI); + LIS->RemoveMachineInstrFromMaps(*CopyMI); CopyMI->eraseFromParent(); return true; } @@ -1393,7 +1467,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { if (!CP.isPartial() && !CP.isPhys()) { if (adjustCopiesBackFrom(CP, CopyMI) || removeCopyByCommutingDef(CP, CopyMI)) { - LIS->RemoveMachineInstrFromMaps(CopyMI); + LIS->RemoveMachineInstrFromMaps(*CopyMI); CopyMI->eraseFromParent(); DEBUG(dbgs() << "\tTrivial!\n"); return true; @@ -1507,8 +1581,8 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { MachineInstr *DestMI = MRI->getVRegDef(RHS.reg); CopyMI = &*MRI->use_instr_nodbg_begin(RHS.reg); - const SlotIndex CopyRegIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); - const SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot(); + const SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot(); + const SlotIndex DestRegIdx = LIS->getInstructionIndex(*DestMI).getRegSlot(); // We checked above that there are no interfering defs of the physical // register. However, for this case, where we intent to move up the def of @@ -1544,7 +1618,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { } } - LIS->RemoveMachineInstrFromMaps(CopyMI); + LIS->RemoveMachineInstrFromMaps(*CopyMI); CopyMI->eraseFromParent(); // We don't track kills for reserved registers. @@ -1775,7 +1849,7 @@ class JoinVals { /// Return true if MI uses any of the given Lanes from Reg. /// This does not include partial redefinitions of Reg. - bool usesLanes(const MachineInstr *MI, unsigned, unsigned, LaneBitmask) const; + bool usesLanes(const MachineInstr &MI, unsigned, unsigned, LaneBitmask) const; /// Determine if ValNo is a copy of a value number in LR or Other.LR that will /// be pruned: @@ -2025,7 +2099,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // IMPLICIT_DEF instructions behind, and there is nothing wrong with it // technically. // - // WHen it happens, treat that IMPLICIT_DEF as a normal value, and don't try + // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try // to erase the IMPLICIT_DEF instruction. if (OtherV.ErasableImplicitDef && DefMI && DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) { @@ -2219,11 +2293,11 @@ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other, return true; } -bool JoinVals::usesLanes(const MachineInstr *MI, unsigned Reg, unsigned SubIdx, +bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx, LaneBitmask Lanes) const { - if (MI->isDebugValue()) + if (MI.isDebugValue()) return false; - for (const MachineOperand &MO : MI->operands()) { + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg) continue; if (!MO.readsReg()) @@ -2278,7 +2352,7 @@ bool JoinVals::resolveConflicts(JoinVals &Other) { unsigned TaintNum = 0; for(;;) { assert(MI != MBB->end() && "Bad LastMI"); - if (usesLanes(MI, Other.Reg, Other.SubIdx, TaintedLanes)) { + if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) { DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI); return false; } @@ -2457,7 +2531,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, } ErasedInstrs.insert(MI); DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI); - LIS->RemoveMachineInstrFromMaps(MI); + LIS->RemoveMachineInstrFromMaps(*MI); MI->eraseFromParent(); break; } @@ -2838,16 +2912,15 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { } else { SmallVector<MachineInstr*, 2> Terminals; - for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); - MII != E; ++MII) - if (MII->isCopyLike()) { - if (applyTerminalRule(*MII)) - Terminals.push_back(&(*MII)); + for (MachineInstr &MII : *MBB) + if (MII.isCopyLike()) { + if (applyTerminalRule(MII)) + Terminals.push_back(&MII); else - WorkList.push_back(MII); - } - // Append the copies evicted by the terminal rule at the end of the list. - WorkList.append(Terminals.begin(), Terminals.end()); + WorkList.push_back(&MII); + } + // Append the copies evicted by the terminal rule at the end of the list. + WorkList.append(Terminals.begin(), Terminals.end()); } // Try coalescing the collected copies immediately, and remove the nulls. // This prevents the WorkList from getting too large since most copies are diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp index f33dc3e..a21d6c1 100644 --- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp @@ -24,7 +24,13 @@ using namespace llvm; /// Increase pressure for each pressure set provided by TargetRegisterInfo. static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure, - PSetIterator PSetI) { + const MachineRegisterInfo &MRI, unsigned Reg, + LaneBitmask PrevMask, LaneBitmask NewMask) { + assert((PrevMask & ~NewMask) == 0 && "Must not remove bits"); + if (PrevMask != 0 || NewMask == 0) + return; + + PSetIterator PSetI = MRI.getPressureSets(Reg); unsigned Weight = PSetI.getWeight(); for (; PSetI.isValid(); ++PSetI) CurrSetPressure[*PSetI] += Weight; @@ -32,7 +38,13 @@ static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure, /// Decrease pressure for each pressure set provided by TargetRegisterInfo. static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure, - PSetIterator PSetI) { + const MachineRegisterInfo &MRI, unsigned Reg, + LaneBitmask PrevMask, LaneBitmask NewMask) { + assert((NewMask & !PrevMask) == 0 && "Must not add bits"); + if (NewMask != 0 || PrevMask == 0) + return; + + PSetIterator PSetI = MRI.getPressureSets(Reg); unsigned Weight = PSetI.getWeight(); for (; PSetI.isValid(); ++PSetI) { assert(CurrSetPressure[*PSetI] >= Weight && "register pressure underflow"); @@ -59,12 +71,20 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { dbgs() << "Max Pressure: "; dumpRegSetPressure(MaxSetPressure, TRI); dbgs() << "Live In: "; - for (unsigned Reg : LiveInRegs) - dbgs() << PrintVRegOrUnit(Reg, TRI) << " "; + for (const RegisterMaskPair &P : LiveInRegs) { + dbgs() << PrintVRegOrUnit(P.RegUnit, TRI); + if (P.LaneMask != ~0u) + dbgs() << ':' << PrintLaneMask(P.LaneMask); + dbgs() << ' '; + } dbgs() << '\n'; dbgs() << "Live Out: "; - for (unsigned Reg : LiveOutRegs) - dbgs() << PrintVRegOrUnit(Reg, TRI) << " "; + for (const RegisterMaskPair &P : LiveOutRegs) { + dbgs() << PrintVRegOrUnit(P.RegUnit, TRI); + if (P.LaneMask != ~0u) + dbgs() << ':' << PrintLaneMask(P.LaneMask); + dbgs() << ' '; + } dbgs() << '\n'; } @@ -89,24 +109,25 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const { dbgs() << '\n'; } -/// Increase the current pressure as impacted by these registers and bump -/// the high water mark if needed. -void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) { - for (unsigned RegUnit : RegUnits) { - PSetIterator PSetI = MRI->getPressureSets(RegUnit); - unsigned Weight = PSetI.getWeight(); - for (; PSetI.isValid(); ++PSetI) { - CurrSetPressure[*PSetI] += Weight; - P.MaxSetPressure[*PSetI] = - std::max(P.MaxSetPressure[*PSetI], CurrSetPressure[*PSetI]); - } +void RegPressureTracker::increaseRegPressure(unsigned RegUnit, + LaneBitmask PreviousMask, + LaneBitmask NewMask) { + if (PreviousMask != 0 || NewMask == 0) + return; + + PSetIterator PSetI = MRI->getPressureSets(RegUnit); + unsigned Weight = PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) { + CurrSetPressure[*PSetI] += Weight; + P.MaxSetPressure[*PSetI] = + std::max(P.MaxSetPressure[*PSetI], CurrSetPressure[*PSetI]); } } -/// Simply decrease the current pressure as impacted by these registers. -void RegPressureTracker::decreaseRegPressure(ArrayRef<unsigned> RegUnits) { - for (unsigned RegUnit : RegUnits) - decreaseSetPressure(CurrSetPressure, MRI->getPressureSets(RegUnit)); +void RegPressureTracker::decreaseRegPressure(unsigned RegUnit, + LaneBitmask PreviousMask, + LaneBitmask NewMask) { + decreaseSetPressure(CurrSetPressure, *MRI, RegUnit, PreviousMask, NewMask); } /// Clear the result so it can be used for another round of pressure tracking. @@ -201,8 +222,7 @@ void RegPressureTracker::init(const MachineFunction *mf, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, - bool ShouldTrackUntiedDefs) -{ + bool TrackLaneMasks, bool TrackUntiedDefs) { reset(); MF = mf; @@ -210,7 +230,8 @@ void RegPressureTracker::init(const MachineFunction *mf, RCI = rci; MRI = &MF->getRegInfo(); MBB = mbb; - TrackUntiedDefs = ShouldTrackUntiedDefs; + this->TrackUntiedDefs = TrackUntiedDefs; + this->TrackLaneMasks = TrackLaneMasks; if (RequireIntervals) { assert(lis && "IntervalPressure requires LiveIntervals"); @@ -250,7 +271,7 @@ SlotIndex RegPressureTracker::getCurrSlot() const { ++IdxPos; if (IdxPos == MBB->end()) return LIS->getMBBEndIdx(MBB); - return LIS->getInstructionIndex(IdxPos).getRegSlot(); + return LIS->getInstructionIndex(*IdxPos).getRegSlot(); } /// Set the boundary for the top of the region and summarize live ins. @@ -297,20 +318,106 @@ void RegPressureTracker::closeRegion() { void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) { LiveThruPressure.assign(TRI->getNumRegPressureSets(), 0); assert(isBottomClosed() && "need bottom-up tracking to intialize."); - for (unsigned Reg : P.LiveOutRegs) { - if (TargetRegisterInfo::isVirtualRegister(Reg) - && !RPTracker.hasUntiedDef(Reg)) { - increaseSetPressure(LiveThruPressure, MRI->getPressureSets(Reg)); + for (const RegisterMaskPair &Pair : P.LiveOutRegs) { + unsigned RegUnit = Pair.RegUnit; + if (TargetRegisterInfo::isVirtualRegister(RegUnit) + && !RPTracker.hasUntiedDef(RegUnit)) + increaseSetPressure(LiveThruPressure, *MRI, RegUnit, 0, Pair.LaneMask); + } +} + +static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits, + unsigned RegUnit) { + auto I = std::find_if(RegUnits.begin(), RegUnits.end(), + [RegUnit](const RegisterMaskPair Other) { + return Other.RegUnit == RegUnit; + }); + if (I == RegUnits.end()) + return 0; + return I->LaneMask; +} + +static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits, + RegisterMaskPair Pair) { + unsigned RegUnit = Pair.RegUnit; + assert(Pair.LaneMask != 0); + auto I = std::find_if(RegUnits.begin(), RegUnits.end(), + [RegUnit](const RegisterMaskPair Other) { + return Other.RegUnit == RegUnit; + }); + if (I == RegUnits.end()) { + RegUnits.push_back(Pair); + } else { + I->LaneMask |= Pair.LaneMask; + } +} + +static void setRegZero(SmallVectorImpl<RegisterMaskPair> &RegUnits, + unsigned RegUnit) { + auto I = std::find_if(RegUnits.begin(), RegUnits.end(), + [RegUnit](const RegisterMaskPair Other) { + return Other.RegUnit == RegUnit; + }); + if (I == RegUnits.end()) { + RegUnits.push_back(RegisterMaskPair(RegUnit, 0)); + } else { + I->LaneMask = 0; + } +} + +static void removeRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits, + RegisterMaskPair Pair) { + unsigned RegUnit = Pair.RegUnit; + assert(Pair.LaneMask != 0); + auto I = std::find_if(RegUnits.begin(), RegUnits.end(), + [RegUnit](const RegisterMaskPair Other) { + return Other.RegUnit == RegUnit; + }); + if (I != RegUnits.end()) { + I->LaneMask &= ~Pair.LaneMask; + if (I->LaneMask == 0) + RegUnits.erase(I); + } +} + +static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS, + const MachineRegisterInfo &MRI, bool TrackLaneMasks, unsigned RegUnit, + SlotIndex Pos, LaneBitmask SafeDefault, + bool(*Property)(const LiveRange &LR, SlotIndex Pos)) { + if (TargetRegisterInfo::isVirtualRegister(RegUnit)) { + const LiveInterval &LI = LIS.getInterval(RegUnit); + LaneBitmask Result = 0; + if (TrackLaneMasks && LI.hasSubRanges()) { + for (const LiveInterval::SubRange &SR : LI.subranges()) { + if (Property(SR, Pos)) + Result |= SR.LaneMask; + } + } else if (Property(LI, Pos)) { + Result = TrackLaneMasks ? MRI.getMaxLaneMaskForVReg(RegUnit) : ~0u; } + + return Result; + } else { + const LiveRange *LR = LIS.getCachedRegUnit(RegUnit); + // Be prepared for missing liveranges: We usually do not compute liveranges + // for physical registers on targets with many registers (GPUs). + if (LR == nullptr) + return SafeDefault; + return Property(*LR, Pos) ? ~0u : 0; } } -/// \brief Convenient wrapper for checking membership in RegisterOperands. -/// (std::count() doesn't have an early exit). -static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) { - return std::find(RegUnits.begin(), RegUnits.end(), RegUnit) != RegUnits.end(); +static LaneBitmask getLiveLanesAt(const LiveIntervals &LIS, + const MachineRegisterInfo &MRI, + bool TrackLaneMasks, unsigned RegUnit, + SlotIndex Pos) { + return getLanesWithProperty(LIS, MRI, TrackLaneMasks, RegUnit, Pos, ~0u, + [](const LiveRange &LR, SlotIndex Pos) { + return LR.liveAt(Pos); + }); } + namespace { /// Collect this instruction's unique uses and defs into SmallVectors for @@ -325,19 +432,25 @@ class RegisterOperandsCollector { RegisterOperandsCollector(RegisterOperands &RegOpers, const TargetRegisterInfo &TRI, - const MachineRegisterInfo &MRI, - bool IgnoreDead) + const MachineRegisterInfo &MRI, bool IgnoreDead) : RegOpers(RegOpers), TRI(TRI), MRI(MRI), IgnoreDead(IgnoreDead) {} void collectInstr(const MachineInstr &MI) const { - for (ConstMIBundleOperands OperI(&MI); OperI.isValid(); ++OperI) + for (ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) collectOperand(*OperI); // Remove redundant physreg dead defs. - SmallVectorImpl<unsigned>::iterator I = - std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(), - std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs)); - RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end()); + for (const RegisterMaskPair &P : RegOpers.Defs) + removeRegLanes(RegOpers.DeadDefs, P); + } + + void collectInstrLanes(const MachineInstr &MI) const { + for (ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) + collectOperandLanes(*OperI); + + // Remove redundant physreg dead defs. + for (const RegisterMaskPair &P : RegOpers.Defs) + removeRegLanes(RegOpers.DeadDefs, P); } /// Push this operand's register onto the correct vectors. @@ -345,28 +458,65 @@ class RegisterOperandsCollector { if (!MO.isReg() || !MO.getReg()) return; unsigned Reg = MO.getReg(); - if (MO.readsReg()) - pushRegUnits(Reg, RegOpers.Uses); - if (MO.isDef()) { + if (MO.isUse()) { + if (!MO.isUndef() && !MO.isInternalRead()) + pushReg(Reg, RegOpers.Uses); + } else { + assert(MO.isDef()); + // Subregister definitions may imply a register read. + if (MO.readsReg()) + pushReg(Reg, RegOpers.Uses); + if (MO.isDead()) { if (!IgnoreDead) - pushRegUnits(Reg, RegOpers.DeadDefs); + pushReg(Reg, RegOpers.DeadDefs); } else - pushRegUnits(Reg, RegOpers.Defs); + pushReg(Reg, RegOpers.Defs); } } - void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &RegUnits) const { + void pushReg(unsigned Reg, + SmallVectorImpl<RegisterMaskPair> &RegUnits) const { if (TargetRegisterInfo::isVirtualRegister(Reg)) { - if (containsReg(RegUnits, Reg)) - return; - RegUnits.push_back(Reg); + addRegLanes(RegUnits, RegisterMaskPair(Reg, ~0u)); } else if (MRI.isAllocatable(Reg)) { - for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) { - if (containsReg(RegUnits, *Units)) - continue; - RegUnits.push_back(*Units); - } + for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) + addRegLanes(RegUnits, RegisterMaskPair(*Units, ~0u)); + } + } + + void collectOperandLanes(const MachineOperand &MO) const { + if (!MO.isReg() || !MO.getReg()) + return; + unsigned Reg = MO.getReg(); + unsigned SubRegIdx = MO.getSubReg(); + if (MO.isUse()) { + if (!MO.isUndef() && !MO.isInternalRead()) + pushRegLanes(Reg, SubRegIdx, RegOpers.Uses); + } else { + assert(MO.isDef()); + // Treat read-undef subreg defs as definitions of the whole register. + if (MO.isUndef()) + SubRegIdx = 0; + + if (MO.isDead()) { + if (!IgnoreDead) + pushRegLanes(Reg, SubRegIdx, RegOpers.DeadDefs); + } else + pushRegLanes(Reg, SubRegIdx, RegOpers.Defs); + } + } + + void pushRegLanes(unsigned Reg, unsigned SubRegIdx, + SmallVectorImpl<RegisterMaskPair> &RegUnits) const { + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + LaneBitmask LaneMask = SubRegIdx != 0 + ? TRI.getSubRegIndexLaneMask(SubRegIdx) + : MRI.getMaxLaneMaskForVReg(Reg); + addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneMask)); + } else if (MRI.isAllocatable(Reg)) { + for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) + addRegLanes(RegUnits, RegisterMaskPair(*Units, ~0u)); } } @@ -378,24 +528,26 @@ class RegisterOperandsCollector { void RegisterOperands::collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, - bool IgnoreDead) { + bool TrackLaneMasks, bool IgnoreDead) { RegisterOperandsCollector Collector(*this, TRI, MRI, IgnoreDead); - Collector.collectInstr(MI); + if (TrackLaneMasks) + Collector.collectInstrLanes(MI); + else + Collector.collectInstr(MI); } void RegisterOperands::detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS) { - SlotIndex SlotIdx = LIS.getInstructionIndex(&MI); - for (SmallVectorImpl<unsigned>::iterator RI = Defs.begin(); - RI != Defs.end(); /*empty*/) { - unsigned Reg = *RI; + SlotIndex SlotIdx = LIS.getInstructionIndex(MI); + for (auto RI = Defs.begin(); RI != Defs.end(); /*empty*/) { + unsigned Reg = RI->RegUnit; const LiveRange *LR = getLiveRange(LIS, Reg); if (LR != nullptr) { LiveQueryResult LRQ = LR->Query(SlotIdx); if (LRQ.isDeadDef()) { // LiveIntervals knows this is a dead even though it's MachineOperand is // not flagged as such. - DeadDefs.push_back(Reg); + DeadDefs.push_back(*RI); RI = Defs.erase(RI); continue; } @@ -404,6 +556,52 @@ void RegisterOperands::detectDeadDefs(const MachineInstr &MI, } } +void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS, + const MachineRegisterInfo &MRI, + SlotIndex Pos, + MachineInstr *AddFlagsMI) { + for (auto I = Defs.begin(); I != Defs.end(); ) { + LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, I->RegUnit, + Pos.getDeadSlot()); + // If the the def is all that is live after the instruction, then in case + // of a subregister def we need a read-undef flag. + unsigned RegUnit = I->RegUnit; + if (TargetRegisterInfo::isVirtualRegister(RegUnit) && + AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask) == 0) + AddFlagsMI->setRegisterDefReadUndef(RegUnit); + + LaneBitmask ActualDef = I->LaneMask & LiveAfter; + if (ActualDef == 0) { + I = Defs.erase(I); + } else { + I->LaneMask = ActualDef; + ++I; + } + } + for (auto I = Uses.begin(); I != Uses.end(); ) { + LaneBitmask LiveBefore = getLiveLanesAt(LIS, MRI, true, I->RegUnit, + Pos.getBaseIndex()); + LaneBitmask LaneMask = I->LaneMask & LiveBefore; + if (LaneMask == 0) { + I = Uses.erase(I); + } else { + I->LaneMask = LaneMask; + ++I; + } + } + if (AddFlagsMI != nullptr) { + for (const RegisterMaskPair &P : DeadDefs) { + unsigned RegUnit = P.RegUnit; + if (!TargetRegisterInfo::isVirtualRegister(RegUnit)) + continue; + LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, RegUnit, + Pos.getDeadSlot()); + if (LiveAfter == 0) + AddFlagsMI->setRegisterDefReadUndef(RegUnit); + } + } +} + /// Initialize an array of N PressureDiffs. void PressureDiffs::init(unsigned N) { Size = N; @@ -421,11 +619,11 @@ void PressureDiffs::addInstruction(unsigned Idx, const MachineRegisterInfo &MRI) { PressureDiff &PDiff = (*this)[Idx]; assert(!PDiff.begin()->isValid() && "stale PDiff"); - for (unsigned Reg : RegOpers.Defs) - PDiff.addPressureChange(Reg, true, &MRI); + for (const RegisterMaskPair &P : RegOpers.Defs) + PDiff.addPressureChange(P.RegUnit, true, &MRI); - for (unsigned Reg : RegOpers.Uses) - PDiff.addPressureChange(Reg, false, &MRI); + for (const RegisterMaskPair &P : RegOpers.Uses) + PDiff.addPressureChange(P.RegUnit, false, &MRI); } /// Add a change in pressure to the pressure diff of a given instruction. @@ -465,33 +663,58 @@ void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec, } /// Force liveness of registers. -void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) { - for (unsigned Reg : Regs) { - if (LiveRegs.insert(Reg)) - increaseRegPressure(Reg); +void RegPressureTracker::addLiveRegs(ArrayRef<RegisterMaskPair> Regs) { + for (const RegisterMaskPair &P : Regs) { + LaneBitmask PrevMask = LiveRegs.insert(P); + LaneBitmask NewMask = PrevMask | P.LaneMask; + increaseRegPressure(P.RegUnit, PrevMask, NewMask); } } -/// Add Reg to the live in set and increase max pressure. -void RegPressureTracker::discoverLiveIn(unsigned Reg) { - assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice"); - if (containsReg(P.LiveInRegs, Reg)) - return; +void RegPressureTracker::discoverLiveInOrOut(RegisterMaskPair Pair, + SmallVectorImpl<RegisterMaskPair> &LiveInOrOut) { + assert(Pair.LaneMask != 0); + + unsigned RegUnit = Pair.RegUnit; + auto I = std::find_if(LiveInOrOut.begin(), LiveInOrOut.end(), + [RegUnit](const RegisterMaskPair &Other) { + return Other.RegUnit == RegUnit; + }); + LaneBitmask PrevMask; + LaneBitmask NewMask; + if (I == LiveInOrOut.end()) { + PrevMask = 0; + NewMask = Pair.LaneMask; + LiveInOrOut.push_back(Pair); + } else { + PrevMask = I->LaneMask; + NewMask = PrevMask | Pair.LaneMask; + I->LaneMask = NewMask; + } + increaseSetPressure(P.MaxSetPressure, *MRI, RegUnit, PrevMask, NewMask); +} - // At live in discovery, unconditionally increase the high water mark. - P.LiveInRegs.push_back(Reg); - increaseSetPressure(P.MaxSetPressure, MRI->getPressureSets(Reg)); +void RegPressureTracker::discoverLiveIn(RegisterMaskPair Pair) { + discoverLiveInOrOut(Pair, P.LiveInRegs); } -/// Add Reg to the live out set and increase max pressure. -void RegPressureTracker::discoverLiveOut(unsigned Reg) { - assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice"); - if (containsReg(P.LiveOutRegs, Reg)) - return; +void RegPressureTracker::discoverLiveOut(RegisterMaskPair Pair) { + discoverLiveInOrOut(Pair, P.LiveOutRegs); +} - // At live out discovery, unconditionally increase the high water mark. - P.LiveOutRegs.push_back(Reg); - increaseSetPressure(P.MaxSetPressure, MRI->getPressureSets(Reg)); +void RegPressureTracker::bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs) { + for (const RegisterMaskPair &P : DeadDefs) { + unsigned Reg = P.RegUnit; + LaneBitmask LiveMask = LiveRegs.contains(Reg); + LaneBitmask BumpedMask = LiveMask | P.LaneMask; + increaseRegPressure(Reg, LiveMask, BumpedMask); + } + for (const RegisterMaskPair &P : DeadDefs) { + unsigned Reg = P.RegUnit; + LaneBitmask LiveMask = LiveRegs.contains(Reg); + LaneBitmask BumpedMask = LiveMask | P.LaneMask; + decreaseRegPressure(Reg, BumpedMask, LiveMask); + } } /// Recede across the previous instruction. If LiveUses is provided, record any @@ -500,48 +723,88 @@ void RegPressureTracker::discoverLiveOut(unsigned Reg) { /// difference pointer is provided record the changes is pressure caused by this /// instruction independent of liveness. void RegPressureTracker::recede(const RegisterOperands &RegOpers, - SmallVectorImpl<unsigned> *LiveUses) { + SmallVectorImpl<RegisterMaskPair> *LiveUses) { assert(!CurrPos->isDebugValue()); // Boost pressure for all dead defs together. - increaseRegPressure(RegOpers.DeadDefs); - decreaseRegPressure(RegOpers.DeadDefs); + bumpDeadDefs(RegOpers.DeadDefs); // Kill liveness at live defs. // TODO: consider earlyclobbers? - for (unsigned Reg : RegOpers.Defs) { - if (LiveRegs.erase(Reg)) - decreaseRegPressure(Reg); - else - discoverLiveOut(Reg); + for (const RegisterMaskPair &Def : RegOpers.Defs) { + unsigned Reg = Def.RegUnit; + + LaneBitmask PreviousMask = LiveRegs.erase(Def); + LaneBitmask NewMask = PreviousMask & ~Def.LaneMask; + + LaneBitmask LiveOut = Def.LaneMask & ~PreviousMask; + if (LiveOut != 0) { + discoverLiveOut(RegisterMaskPair(Reg, LiveOut)); + // Retroactively model effects on pressure of the live out lanes. + increaseSetPressure(CurrSetPressure, *MRI, Reg, 0, LiveOut); + PreviousMask = LiveOut; + } + + if (NewMask == 0) { + // Add a 0 entry to LiveUses as a marker that the complete vreg has become + // dead. + if (TrackLaneMasks && LiveUses != nullptr) + setRegZero(*LiveUses, Reg); + } + + decreaseRegPressure(Reg, PreviousMask, NewMask); } SlotIndex SlotIdx; if (RequireIntervals) - SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot(); + SlotIdx = LIS->getInstructionIndex(*CurrPos).getRegSlot(); // Generate liveness for uses. - for (unsigned Reg : RegOpers.Uses) { - if (!LiveRegs.contains(Reg)) { - // Adjust liveouts if LiveIntervals are available. - if (RequireIntervals) { - const LiveRange *LR = getLiveRange(*LIS, Reg); - if (LR) { - LiveQueryResult LRQ = LR->Query(SlotIdx); - if (!LRQ.isKill() && !LRQ.valueDefined()) - discoverLiveOut(Reg); + for (const RegisterMaskPair &Use : RegOpers.Uses) { + unsigned Reg = Use.RegUnit; + assert(Use.LaneMask != 0); + LaneBitmask PreviousMask = LiveRegs.insert(Use); + LaneBitmask NewMask = PreviousMask | Use.LaneMask; + if (NewMask == PreviousMask) + continue; + + // Did the register just become live? + if (PreviousMask == 0) { + if (LiveUses != nullptr) { + if (!TrackLaneMasks) { + addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask)); + } else { + auto I = std::find_if(LiveUses->begin(), LiveUses->end(), + [Reg](const RegisterMaskPair Other) { + return Other.RegUnit == Reg; + }); + bool IsRedef = I != LiveUses->end(); + if (IsRedef) { + // ignore re-defs here... + assert(I->LaneMask == 0); + removeRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask)); + } else { + addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask)); + } } } - increaseRegPressure(Reg); - LiveRegs.insert(Reg); - if (LiveUses && !containsReg(*LiveUses, Reg)) - LiveUses->push_back(Reg); + + // Discover live outs if this may be the first occurance of this register. + if (RequireIntervals) { + LaneBitmask LiveOut = getLiveThroughAt(Reg, SlotIdx); + if (LiveOut != 0) + discoverLiveOut(RegisterMaskPair(Reg, LiveOut)); + } } + + increaseRegPressure(Reg, PreviousMask, NewMask); } if (TrackUntiedDefs) { - for (unsigned Reg : RegOpers.Defs) { - if (TargetRegisterInfo::isVirtualRegister(Reg) && !LiveRegs.contains(Reg)) - UntiedDefs.insert(Reg); + for (const RegisterMaskPair &Def : RegOpers.Defs) { + unsigned RegUnit = Def.RegUnit; + if (TargetRegisterInfo::isVirtualRegister(RegUnit) && + (LiveRegs.contains(RegUnit) & Def.LaneMask) == 0) + UntiedDefs.insert(RegUnit); } } } @@ -562,29 +825,32 @@ void RegPressureTracker::recedeSkipDebugValues() { SlotIndex SlotIdx; if (RequireIntervals) - SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot(); + SlotIdx = LIS->getInstructionIndex(*CurrPos).getRegSlot(); // Open the top of the region using slot indexes. if (RequireIntervals && isTopClosed()) static_cast<IntervalPressure&>(P).openTop(SlotIdx); } -void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses) { +void RegPressureTracker::recede(SmallVectorImpl<RegisterMaskPair> *LiveUses) { recedeSkipDebugValues(); const MachineInstr &MI = *CurrPos; RegisterOperands RegOpers; - RegOpers.collect(MI, *TRI, *MRI); - if (RequireIntervals) + RegOpers.collect(MI, *TRI, *MRI, TrackLaneMasks, false); + if (TrackLaneMasks) { + SlotIndex SlotIdx = LIS->getInstructionIndex(*CurrPos).getRegSlot(); + RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx); + } else if (RequireIntervals) { RegOpers.detectDeadDefs(MI, *LIS); + } recede(RegOpers, LiveUses); } /// Advance across the current instruction. -void RegPressureTracker::advance() { +void RegPressureTracker::advance(const RegisterOperands &RegOpers) { assert(!TrackUntiedDefs && "unsupported mode"); - assert(CurrPos != MBB->end()); if (!isTopClosed()) closeTop(); @@ -601,39 +867,34 @@ void RegPressureTracker::advance() { static_cast<RegionPressure&>(P).openBottom(CurrPos); } - RegisterOperands RegOpers; - RegOpers.collect(*CurrPos, *TRI, *MRI); - - for (unsigned Reg : RegOpers.Uses) { - // Discover live-ins. - bool isLive = LiveRegs.contains(Reg); - if (!isLive) - discoverLiveIn(Reg); + for (const RegisterMaskPair &Use : RegOpers.Uses) { + unsigned Reg = Use.RegUnit; + LaneBitmask LiveMask = LiveRegs.contains(Reg); + LaneBitmask LiveIn = Use.LaneMask & ~LiveMask; + if (LiveIn != 0) { + discoverLiveIn(RegisterMaskPair(Reg, LiveIn)); + increaseRegPressure(Reg, LiveMask, LiveMask | LiveIn); + LiveRegs.insert(RegisterMaskPair(Reg, LiveIn)); + } // Kill liveness at last uses. - bool lastUse = false; if (RequireIntervals) { - const LiveRange *LR = getLiveRange(*LIS, Reg); - lastUse = LR && LR->Query(SlotIdx).isKill(); - } else { - // Allocatable physregs are always single-use before register rewriting. - lastUse = !TargetRegisterInfo::isVirtualRegister(Reg); + LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx); + if (LastUseMask != 0) { + LiveRegs.erase(RegisterMaskPair(Reg, LastUseMask)); + decreaseRegPressure(Reg, LiveMask, LiveMask & ~LastUseMask); + } } - if (lastUse && isLive) { - LiveRegs.erase(Reg); - decreaseRegPressure(Reg); - } else if (!lastUse && !isLive) - increaseRegPressure(Reg); } // Generate liveness for defs. - for (unsigned Reg : RegOpers.Defs) { - if (LiveRegs.insert(Reg)) - increaseRegPressure(Reg); + for (const RegisterMaskPair &Def : RegOpers.Defs) { + LaneBitmask PreviousMask = LiveRegs.insert(Def); + LaneBitmask NewMask = PreviousMask | Def.LaneMask; + increaseRegPressure(Def.RegUnit, PreviousMask, NewMask); } // Boost pressure for all dead defs together. - increaseRegPressure(RegOpers.DeadDefs); - decreaseRegPressure(RegOpers.DeadDefs); + bumpDeadDefs(RegOpers.DeadDefs); // Find the next instruction. do @@ -641,6 +902,17 @@ void RegPressureTracker::advance() { while (CurrPos != MBB->end() && CurrPos->isDebugValue()); } +void RegPressureTracker::advance() { + const MachineInstr &MI = *CurrPos; + RegisterOperands RegOpers; + RegOpers.collect(MI, *TRI, *MRI, TrackLaneMasks, false); + if (TrackLaneMasks) { + SlotIndex SlotIdx = getCurrSlot(); + RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx); + } + advance(RegOpers); +} + /// Find the max change in excess pressure across all sets. static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec, ArrayRef<unsigned> NewPressureVec, @@ -728,22 +1000,38 @@ static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec, void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { assert(!MI->isDebugValue() && "Expect a nondebug instruction."); + SlotIndex SlotIdx; + if (RequireIntervals) + SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); + // Account for register pressure similar to RegPressureTracker::recede(). RegisterOperands RegOpers; - RegOpers.collect(*MI, *TRI, *MRI, /*IgnoreDead=*/true); + RegOpers.collect(*MI, *TRI, *MRI, TrackLaneMasks, /*IgnoreDead=*/true); assert(RegOpers.DeadDefs.size() == 0); - if (RequireIntervals) + if (TrackLaneMasks) + RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx); + else if (RequireIntervals) RegOpers.detectDeadDefs(*MI, *LIS); + // Boost max pressure for all dead defs together. + // Since CurrSetPressure and MaxSetPressure + bumpDeadDefs(RegOpers.DeadDefs); + // Kill liveness at live defs. - for (unsigned Reg : RegOpers.Defs) { - if (!containsReg(RegOpers.Uses, Reg)) - decreaseRegPressure(Reg); + for (const RegisterMaskPair &P : RegOpers.Defs) { + unsigned Reg = P.RegUnit; + LaneBitmask LiveLanes = LiveRegs.contains(Reg); + LaneBitmask UseLanes = getRegLanes(RegOpers.Uses, Reg); + LaneBitmask DefLanes = P.LaneMask; + LaneBitmask LiveAfter = (LiveLanes & ~DefLanes) | UseLanes; + decreaseRegPressure(Reg, LiveLanes, LiveAfter); } // Generate liveness for uses. - for (unsigned Reg : RegOpers.Uses) { - if (!LiveRegs.contains(Reg)) - increaseRegPressure(Reg); + for (const RegisterMaskPair &P : RegOpers.Uses) { + unsigned Reg = P.RegUnit; + LaneBitmask LiveLanes = LiveRegs.contains(Reg); + LaneBitmask LiveAfter = LiveLanes | P.LaneMask; + increaseRegPressure(Reg, LiveLanes, LiveAfter); } } @@ -888,15 +1176,58 @@ getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff, } /// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx). -static bool findUseBetween(unsigned Reg, SlotIndex PriorUseIdx, - SlotIndex NextUseIdx, const MachineRegisterInfo &MRI, - const LiveIntervals *LIS) { - for (const MachineInstr &MI : MRI.use_nodbg_instructions(Reg)) { - SlotIndex InstSlot = LIS->getInstructionIndex(&MI).getRegSlot(); - if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx) - return true; +/// The query starts with a lane bitmask which gets lanes/bits removed for every +/// use we find. +static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask, + SlotIndex PriorUseIdx, SlotIndex NextUseIdx, + const MachineRegisterInfo &MRI, + const LiveIntervals *LIS) { + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + for (const MachineOperand &MO : MRI.use_nodbg_operands(Reg)) { + if (MO.isUndef()) + continue; + const MachineInstr *MI = MO.getParent(); + SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot(); + if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx) { + unsigned SubRegIdx = MO.getSubReg(); + LaneBitmask UseMask = TRI.getSubRegIndexLaneMask(SubRegIdx); + LastUseMask &= ~UseMask; + if (LastUseMask == 0) + return 0; + } } - return false; + return LastUseMask; +} + +LaneBitmask RegPressureTracker::getLiveLanesAt(unsigned RegUnit, + SlotIndex Pos) const { + assert(RequireIntervals); + return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos, ~0u, + [](const LiveRange &LR, SlotIndex Pos) { + return LR.liveAt(Pos); + }); +} + +LaneBitmask RegPressureTracker::getLastUsedLanes(unsigned RegUnit, + SlotIndex Pos) const { + assert(RequireIntervals); + return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, + Pos.getBaseIndex(), 0, + [](const LiveRange &LR, SlotIndex Pos) { + const LiveRange::Segment *S = LR.getSegmentContaining(Pos); + return S != nullptr && S->end == Pos.getRegSlot(); + }); +} + +LaneBitmask RegPressureTracker::getLiveThroughAt(unsigned RegUnit, + SlotIndex Pos) const { + assert(RequireIntervals); + return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos, 0u, + [](const LiveRange &LR, SlotIndex Pos) { + const LiveRange::Segment *S = LR.getSegmentContaining(Pos); + return S != nullptr && S->start < Pos.getRegSlot(true) && + S->end != Pos.getDeadSlot(); + }); } /// Record the downward impact of a single instruction on current register @@ -908,39 +1239,49 @@ static bool findUseBetween(unsigned Reg, SlotIndex PriorUseIdx, void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { assert(!MI->isDebugValue() && "Expect a nondebug instruction."); - // Account for register pressure similar to RegPressureTracker::recede(). - RegisterOperands RegOpers; - RegOpers.collect(*MI, *TRI, *MRI); - - // Kill liveness at last uses. Assume allocatable physregs are single-use - // rather than checking LiveIntervals. SlotIndex SlotIdx; if (RequireIntervals) - SlotIdx = LIS->getInstructionIndex(MI).getRegSlot(); + SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); - for (unsigned Reg : RegOpers.Uses) { - if (RequireIntervals) { + // Account for register pressure similar to RegPressureTracker::recede(). + RegisterOperands RegOpers; + RegOpers.collect(*MI, *TRI, *MRI, TrackLaneMasks, false); + if (TrackLaneMasks) + RegOpers.adjustLaneLiveness(*LIS, *MRI, SlotIdx); + + if (RequireIntervals) { + for (const RegisterMaskPair &Use : RegOpers.Uses) { + unsigned Reg = Use.RegUnit; + LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx); + if (LastUseMask == 0) + continue; + // The LastUseMask is queried from the liveness information of instruction + // which may be further down the schedule. Some lanes may actually not be + // last uses for the current position. // FIXME: allow the caller to pass in the list of vreg uses that remain // to be bottom-scheduled to avoid searching uses at each query. SlotIndex CurrIdx = getCurrSlot(); - const LiveRange *LR = getLiveRange(*LIS, Reg); - if (LR) { - LiveQueryResult LRQ = LR->Query(SlotIdx); - if (LRQ.isKill() && !findUseBetween(Reg, CurrIdx, SlotIdx, *MRI, LIS)) - decreaseRegPressure(Reg); - } - } else if (!TargetRegisterInfo::isVirtualRegister(Reg)) { - // Allocatable physregs are always single-use before register rewriting. - decreaseRegPressure(Reg); + LastUseMask + = findUseBetween(Reg, LastUseMask, CurrIdx, SlotIdx, *MRI, LIS); + if (LastUseMask == 0) + continue; + + LaneBitmask LiveMask = LiveRegs.contains(Reg); + LaneBitmask NewMask = LiveMask & ~LastUseMask; + decreaseRegPressure(Reg, LiveMask, NewMask); } } // Generate liveness for defs. - increaseRegPressure(RegOpers.Defs); + for (const RegisterMaskPair &Def : RegOpers.Defs) { + unsigned Reg = Def.RegUnit; + LaneBitmask LiveMask = LiveRegs.contains(Reg); + LaneBitmask NewMask = LiveMask | Def.LaneMask; + increaseRegPressure(Reg, LiveMask, NewMask); + } // Boost pressure for all dead defs together. - increaseRegPressure(RegOpers.DeadDefs); - decreaseRegPressure(RegOpers.DeadDefs); + bumpDeadDefs(RegOpers.DeadDefs); } /// Consider the pressure increase caused by traversing this instruction diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp index 8fa1bf7..6b80179 100644 --- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -7,10 +7,11 @@ // //===----------------------------------------------------------------------===// // -// This file implements the machine register scavenger. It can provide -// information, such as unused registers, at any point in a machine basic block. -// It also provides a mechanism to make registers available by evicting them to -// spill slots. +/// \file +/// This file implements the machine register scavenger. It can provide +/// information, such as unused registers, at any point in a machine basic +/// block. It also provides a mechanism to make registers available by evicting +/// them to spill slots. // //===----------------------------------------------------------------------===// @@ -30,7 +31,6 @@ using namespace llvm; #define DEBUG_TYPE "reg-scavenging" -/// setUsed - Set the register units of this register as used. void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) { for (MCRegUnitMaskIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) { LaneBitmask UnitMask = (*RUI).second; @@ -49,9 +49,6 @@ void RegScavenger::initRegState() { // All register units start out unused. RegUnitsAvailable.set(); - if (!MBB) - return; - // Live-in registers are in use. for (const auto &LI : MBB->liveins()) setRegUsed(LI.PhysReg, LI.LaneMask); @@ -63,8 +60,8 @@ void RegScavenger::initRegState() { setRegUsed(I); } -void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { - MachineFunction &MF = *mbb->getParent(); +void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) { + MachineFunction &MF = *MBB.getParent(); TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); @@ -78,15 +75,15 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { "Cannot use register scavenger with inaccurate liveness"); // Self-initialize. - if (!MBB) { + if (!this->MBB) { NumRegUnits = TRI->getNumRegUnits(); RegUnitsAvailable.resize(NumRegUnits); KillRegUnits.resize(NumRegUnits); DefRegUnits.resize(NumRegUnits); TmpRegUnits.resize(NumRegUnits); } + this->MBB = &MBB; - MBB = mbb; initRegState(); Tracking = false; @@ -100,17 +97,15 @@ void RegScavenger::addRegUnits(BitVector &BV, unsigned Reg) { void RegScavenger::determineKillsAndDefs() { assert(Tracking && "Must be tracking to determine kills and defs"); - MachineInstr *MI = MBBI; - assert(!MI->isDebugValue() && "Debug values have no kills or defs"); + MachineInstr &MI = *MBBI; + assert(!MI.isDebugValue() && "Debug values have no kills or defs"); // Find out which registers are early clobbered, killed, defined, and marked // def-dead in this instruction. KillRegUnits.reset(); DefRegUnits.reset(); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (MO.isRegMask()) { - TmpRegUnits.clear(); for (unsigned RU = 0, RUEnd = TRI->getNumRegUnits(); RU != RUEnd; ++RU) { for (MCRegUnitRootIterator RURI(RU, TRI); RURI.isValid(); ++RURI) { @@ -120,14 +115,14 @@ void RegScavenger::determineKillsAndDefs() { } } } - + // Apply the mask. KillRegUnits |= TmpRegUnits; } if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg)) + if (!TargetRegisterInfo::isPhysicalRegister(Reg) || isReserved(Reg)) continue; if (MO.isUse()) { @@ -149,8 +144,8 @@ void RegScavenger::determineKillsAndDefs() { void RegScavenger::unprocess() { assert(Tracking && "Cannot unprocess because we're not tracking"); - MachineInstr *MI = MBBI; - if (!MI->isDebugValue()) { + MachineInstr &MI = *MBBI; + if (!MI.isDebugValue()) { determineKillsAndDefs(); // Commit the changes. @@ -176,30 +171,29 @@ void RegScavenger::forward() { } assert(MBBI != MBB->end() && "Already at the end of the basic block!"); - MachineInstr *MI = MBBI; + MachineInstr &MI = *MBBI; for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(), IE = Scavenged.end(); I != IE; ++I) { - if (I->Restore != MI) + if (I->Restore != &MI) continue; I->Reg = 0; I->Restore = nullptr; } - if (MI->isDebugValue()) + if (MI.isDebugValue()) return; determineKillsAndDefs(); // Verify uses and defs. #ifndef NDEBUG - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg)) + if (!TargetRegisterInfo::isPhysicalRegister(Reg) || isReserved(Reg)) continue; if (MO.isUse()) { if (MO.isUndef()) @@ -261,33 +255,24 @@ bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const { } unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { - for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); - I != E; ++I) - if (!isRegUsed(*I)) { - DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(*I) << + for (unsigned Reg : *RC) { + if (!isRegUsed(Reg)) { + DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(Reg) << "\n"); - return *I; + return Reg; } + } return 0; } -/// getRegsAvailable - Return all available registers in the register class -/// in Mask. BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) { BitVector Mask(TRI->getNumRegs()); - for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); - I != E; ++I) - if (!isRegUsed(*I)) - Mask.set(*I); + for (unsigned Reg : *RC) + if (!isRegUsed(Reg)) + Mask.set(Reg); return Mask; } -/// findSurvivorReg - Return the candidate register that is unused for the -/// longest after StartMII. UseMI is set to the instruction where the search -/// stopped. -/// -/// No more than InstrLimit instructions are inspected. -/// unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, BitVector &Candidates, unsigned InstrLimit, @@ -309,8 +294,7 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, bool isVirtKillInsn = false; bool isVirtDefInsn = false; // Remove any candidates touched by instruction. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (const MachineOperand &MO : MI->operands()) { if (MO.isRegMask()) Candidates.clearBitsNotInMask(MO.getRegMask()); if (!MO.isReg() || MO.isUndef() || !MO.getReg()) @@ -345,20 +329,19 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, } // If we ran off the end, that's where we want to restore. if (MI == ME) RestorePointMI = ME; - assert (RestorePointMI != StartMI && - "No available scavenger restore location!"); + assert(RestorePointMI != StartMI && + "No available scavenger restore location!"); // We ran out of candidates, so stop the search. UseMI = RestorePointMI; return Survivor; } -static unsigned getFrameIndexOperandNum(MachineInstr *MI) { +static unsigned getFrameIndexOperandNum(MachineInstr &MI) { unsigned i = 0; - while (!MI->getOperand(i).isFI()) { + while (!MI.getOperand(i).isFI()) { ++i; - assert(i < MI->getNumOperands() && - "Instr doesn't have FrameIndex operand!"); + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); } return i; } @@ -366,13 +349,13 @@ static unsigned getFrameIndexOperandNum(MachineInstr *MI) { unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj) { + MachineInstr &MI = *I; + const MachineFunction &MF = *MI.getParent()->getParent(); // Consider all allocatable registers in the register class initially - BitVector Candidates = - TRI->getAllocatableSet(*I->getParent()->getParent(), RC); + BitVector Candidates = TRI->getAllocatableSet(MF, RC); // Exclude all the registers being used by the instruction. - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - MachineOperand &MO = I->getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) && !TargetRegisterInfo::isVirtualRegister(MO.getReg())) Candidates.reset(MO.getReg()); @@ -395,16 +378,42 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, return SReg; } - // Find an available scavenging slot. - unsigned SI; - for (SI = 0; SI < Scavenged.size(); ++SI) - if (Scavenged[SI].Reg == 0) - break; + // Find an available scavenging slot with size and alignment matching + // the requirements of the class RC. + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned NeedSize = RC->getSize(); + unsigned NeedAlign = RC->getAlignment(); + + unsigned SI = Scavenged.size(), Diff = UINT_MAX; + int FIB = MFI.getObjectIndexBegin(), FIE = MFI.getObjectIndexEnd(); + for (unsigned I = 0; I < Scavenged.size(); ++I) { + if (Scavenged[I].Reg != 0) + continue; + // Verify that this slot is valid for this register. + int FI = Scavenged[I].FrameIndex; + if (FI < FIB || FI >= FIE) + continue; + unsigned S = MFI.getObjectSize(FI); + unsigned A = MFI.getObjectAlignment(FI); + if (NeedSize > S || NeedAlign > A) + continue; + // Avoid wasting slots with large size and/or large alignment. Pick one + // that is the best fit for this register class (in street metric). + // Picking a larger slot than necessary could happen if a slot for a + // larger register is reserved before a slot for a smaller one. When + // trying to spill a smaller register, the large slot would be found + // first, thus making it impossible to spill the larger register later. + unsigned D = (S-NeedSize) + (A-NeedAlign); + if (D < Diff) { + SI = I; + Diff = D; + } + } if (SI == Scavenged.size()) { // We need to scavenge a register but have no spill slot, the target // must know how to do it (if not, we'll assert below). - Scavenged.push_back(ScavengedInfo()); + Scavenged.push_back(ScavengedInfo(FIE)); } // Avoid infinite regress @@ -414,13 +423,18 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // otherwise, use the emergency stack spill slot. if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) { // Spill the scavenged register before I. - assert(Scavenged[SI].FrameIndex >= 0 && - "Cannot scavenge register without an emergency spill slot!"); + int FI = Scavenged[SI].FrameIndex; + if (FI < FIB || FI >= FIE) { + std::string Msg = std::string("Error while trying to spill ") + + TRI->getName(SReg) + " from class " + TRI->getRegClassName(RC) + + ": Cannot scavenge register without an emergency spill slot!"; + report_fatal_error(Msg.c_str()); + } TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex, RC, TRI); MachineBasicBlock::iterator II = std::prev(I); - unsigned FIOperandNum = getFrameIndexOperandNum(II); + unsigned FIOperandNum = getFrameIndexOperandNum(*II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); // Restore the scavenged register before its use (or first terminator). @@ -428,11 +442,11 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, RC, TRI); II = std::prev(UseMI); - FIOperandNum = getFrameIndexOperandNum(II); + FIOperandNum = getFrameIndexOperandNum(*II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); } - Scavenged[SI].Restore = std::prev(UseMI); + Scavenged[SI].Restore = &*std::prev(UseMI); // Doing this here leads to infinite regress. // Scavenged[SI].Reg = SReg; diff --git a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp new file mode 100644 index 0000000..5cf3e57 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp @@ -0,0 +1,93 @@ +//===- RegisterUsageInfo.cpp - Register Usage Informartion Storage --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// This pass is required to take advantage of the interprocedural register +/// allocation infrastructure. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/RegisterUsageInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "ip-regalloc" + +cl::opt<bool> DumpRegUsage( + "print-regusage", cl::init(false), cl::Hidden, + cl::desc("print register usage details collected for analysis.")); + +INITIALIZE_PASS(PhysicalRegisterUsageInfo, "reg-usage-info", + "Register Usage Informartion Stroage", false, true) + +char PhysicalRegisterUsageInfo::ID = 0; + +void PhysicalRegisterUsageInfo::anchor() {} + +bool PhysicalRegisterUsageInfo::doInitialization(Module &M) { + RegMasks.grow(M.size()); + return false; +} + +bool PhysicalRegisterUsageInfo::doFinalization(Module &M) { + if (DumpRegUsage) + print(errs()); + + RegMasks.shrink_and_clear(); + return false; +} + +void PhysicalRegisterUsageInfo::storeUpdateRegUsageInfo( + const Function *FP, std::vector<uint32_t> RegMask) { + assert(FP != nullptr && "Function * can't be nullptr."); + RegMasks[FP] = std::move(RegMask); +} + +const std::vector<uint32_t> * +PhysicalRegisterUsageInfo::getRegUsageInfo(const Function *FP) { + auto It = RegMasks.find(FP); + if (It != RegMasks.end()) + return &(It->second); + return nullptr; +} + +void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const { + const TargetRegisterInfo *TRI; + + typedef std::pair<const Function *, std::vector<uint32_t>> FuncPtrRegMaskPair; + + SmallVector<const FuncPtrRegMaskPair *, 64> FPRMPairVector; + + // Create a vector of pointer to RegMasks entries + for (const auto &RegMask : RegMasks) + FPRMPairVector.push_back(&RegMask); + + // sort the vector to print analysis in alphabatic order of function name. + std::sort( + FPRMPairVector.begin(), FPRMPairVector.end(), + [](const FuncPtrRegMaskPair *A, const FuncPtrRegMaskPair *B) -> bool { + return A->first->getName() < B->first->getName(); + }); + + for (const FuncPtrRegMaskPair *FPRMPair : FPRMPairVector) { + OS << FPRMPair->first->getName() << " " + << "Clobbered Registers: "; + TRI = TM->getSubtarget<TargetSubtargetInfo>(*(FPRMPair->first)) + .getRegisterInfo(); + + for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) { + if (MachineOperand::clobbersPhysReg(&(FPRMPair->second[0]), PReg)) + OS << TRI->getName(PReg) << " "; + } + OS << "\n"; + } +} diff --git a/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp new file mode 100644 index 0000000..ea952d9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp @@ -0,0 +1,388 @@ +//===-- RenameIndependentSubregs.cpp - Live Interval Analysis -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// Rename independent subregisters looks for virtual registers with +/// independently used subregisters and renames them to new virtual registers. +/// Example: In the following: +/// %vreg0:sub0<read-undef> = ... +/// %vreg0:sub1 = ... +/// use %vreg0:sub0 +/// %vreg0:sub0 = ... +/// use %vreg0:sub0 +/// use %vreg0:sub1 +/// sub0 and sub1 are never used together, and we have two independent sub0 +/// definitions. This pass will rename to: +/// %vreg0:sub0<read-undef> = ... +/// %vreg1:sub1<read-undef> = ... +/// use %vreg1:sub1 +/// %vreg2:sub1<read-undef> = ... +/// use %vreg2:sub1 +/// use %vreg0:sub0 +// +//===----------------------------------------------------------------------===// + +#include "LiveRangeUtils.h" +#include "PHIEliminationUtils.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" + +using namespace llvm; + +#define DEBUG_TYPE "rename-independent-subregs" + +namespace { + +class RenameIndependentSubregs : public MachineFunctionPass { +public: + static char ID; + RenameIndependentSubregs() : MachineFunctionPass(ID) {} + + const char *getPassName() const override { + return "Rename Disconnected Subregister Components"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + AU.addRequired<SlotIndexes>(); + AU.addPreserved<SlotIndexes>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + struct SubRangeInfo { + ConnectedVNInfoEqClasses ConEQ; + LiveInterval::SubRange *SR; + unsigned Index; + + SubRangeInfo(LiveIntervals &LIS, LiveInterval::SubRange &SR, + unsigned Index) + : ConEQ(LIS), SR(&SR), Index(Index) {} + }; + + /// Split unrelated subregister components and rename them to new vregs. + bool renameComponents(LiveInterval &LI) const; + + /// \brief Build a vector of SubRange infos and a union find set of + /// equivalence classes. + /// Returns true if more than 1 equivalence class was found. + bool findComponents(IntEqClasses &Classes, + SmallVectorImpl<SubRangeInfo> &SubRangeInfos, + LiveInterval &LI) const; + + /// \brief Distribute the LiveInterval segments into the new LiveIntervals + /// belonging to their class. + void distribute(const IntEqClasses &Classes, + const SmallVectorImpl<SubRangeInfo> &SubRangeInfos, + const SmallVectorImpl<LiveInterval*> &Intervals) const; + + /// \brief Constructs main liverange and add missing undef+dead flags. + void computeMainRangesFixFlags(const IntEqClasses &Classes, + const SmallVectorImpl<SubRangeInfo> &SubRangeInfos, + const SmallVectorImpl<LiveInterval*> &Intervals) const; + + /// Rewrite Machine Operands to use the new vreg belonging to their class. + void rewriteOperands(const IntEqClasses &Classes, + const SmallVectorImpl<SubRangeInfo> &SubRangeInfos, + const SmallVectorImpl<LiveInterval*> &Intervals) const; + + + LiveIntervals *LIS; + MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; +}; + +} // end anonymous namespace + +char RenameIndependentSubregs::ID; + +char &llvm::RenameIndependentSubregsID = RenameIndependentSubregs::ID; + +INITIALIZE_PASS_BEGIN(RenameIndependentSubregs, "rename-independent-subregs", + "Rename Independent Subregisters", false, false) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(RenameIndependentSubregs, "rename-independent-subregs", + "Rename Independent Subregisters", false, false) + +bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const { + // Shortcut: We cannot have split components with a single definition. + if (LI.valnos.size() < 2) + return false; + + SmallVector<SubRangeInfo, 4> SubRangeInfos; + IntEqClasses Classes; + if (!findComponents(Classes, SubRangeInfos, LI)) + return false; + + // Create a new VReg for each class. + unsigned Reg = LI.reg; + const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); + SmallVector<LiveInterval*, 4> Intervals; + Intervals.push_back(&LI); + DEBUG(dbgs() << PrintReg(Reg) << ": Found " << Classes.getNumClasses() + << " equivalence classes.\n"); + DEBUG(dbgs() << PrintReg(Reg) << ": Splitting into newly created:"); + for (unsigned I = 1, NumClasses = Classes.getNumClasses(); I < NumClasses; + ++I) { + unsigned NewVReg = MRI->createVirtualRegister(RegClass); + LiveInterval &NewLI = LIS->createEmptyInterval(NewVReg); + Intervals.push_back(&NewLI); + DEBUG(dbgs() << ' ' << PrintReg(NewVReg)); + } + DEBUG(dbgs() << '\n'); + + rewriteOperands(Classes, SubRangeInfos, Intervals); + distribute(Classes, SubRangeInfos, Intervals); + computeMainRangesFixFlags(Classes, SubRangeInfos, Intervals); + return true; +} + +bool RenameIndependentSubregs::findComponents(IntEqClasses &Classes, + SmallVectorImpl<RenameIndependentSubregs::SubRangeInfo> &SubRangeInfos, + LiveInterval &LI) const { + // First step: Create connected components for the VNInfos inside the + // subranges and count the global number of such components. + unsigned NumComponents = 0; + for (LiveInterval::SubRange &SR : LI.subranges()) { + SubRangeInfos.push_back(SubRangeInfo(*LIS, SR, NumComponents)); + ConnectedVNInfoEqClasses &ConEQ = SubRangeInfos.back().ConEQ; + + unsigned NumSubComponents = ConEQ.Classify(SR); + NumComponents += NumSubComponents; + } + // Shortcut: With only 1 subrange, the normal separate component tests are + // enough and we do not need to perform the union-find on the subregister + // segments. + if (SubRangeInfos.size() < 2) + return false; + + // Next step: Build union-find structure over all subranges and merge classes + // across subranges when they are affected by the same MachineOperand. + const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); + Classes.grow(NumComponents); + unsigned Reg = LI.reg; + for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { + if (!MO.isDef() && !MO.readsReg()) + continue; + unsigned SubRegIdx = MO.getSubReg(); + LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubRegIdx); + unsigned MergedID = ~0u; + for (RenameIndependentSubregs::SubRangeInfo &SRInfo : SubRangeInfos) { + const LiveInterval::SubRange &SR = *SRInfo.SR; + if ((SR.LaneMask & LaneMask) == 0) + continue; + SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent()); + Pos = MO.isDef() ? Pos.getRegSlot(MO.isEarlyClobber()) + : Pos.getBaseIndex(); + const VNInfo *VNI = SR.getVNInfoAt(Pos); + if (VNI == nullptr) + continue; + + // Map to local representant ID. + unsigned LocalID = SRInfo.ConEQ.getEqClass(VNI); + // Global ID + unsigned ID = LocalID + SRInfo.Index; + // Merge other sets + MergedID = MergedID == ~0u ? ID : Classes.join(MergedID, ID); + } + } + + // Early exit if we ended up with a single equivalence class. + Classes.compress(); + unsigned NumClasses = Classes.getNumClasses(); + return NumClasses > 1; +} + +void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes, + const SmallVectorImpl<SubRangeInfo> &SubRangeInfos, + const SmallVectorImpl<LiveInterval*> &Intervals) const { + const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); + unsigned Reg = Intervals[0]->reg;; + for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg), + E = MRI->reg_nodbg_end(); I != E; ) { + MachineOperand &MO = *I++; + if (!MO.isDef() && !MO.readsReg()) + continue; + + MachineInstr &MI = *MO.getParent(); + + SlotIndex Pos = LIS->getInstructionIndex(MI); + unsigned SubRegIdx = MO.getSubReg(); + LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubRegIdx); + + unsigned ID = ~0u; + for (const SubRangeInfo &SRInfo : SubRangeInfos) { + const LiveInterval::SubRange &SR = *SRInfo.SR; + if ((SR.LaneMask & LaneMask) == 0) + continue; + LiveRange::const_iterator I = SR.find(Pos); + if (I == SR.end()) + continue; + + const VNInfo &VNI = *I->valno; + // Map to local representant ID. + unsigned LocalID = SRInfo.ConEQ.getEqClass(&VNI); + // Global ID + ID = Classes[LocalID + SRInfo.Index]; + break; + } + + unsigned VReg = Intervals[ID]->reg; + MO.setReg(VReg); + } + // TODO: We could attempt to recompute new register classes while visiting + // the operands: Some of the split register may be fine with less constraint + // classes than the original vreg. +} + +void RenameIndependentSubregs::distribute(const IntEqClasses &Classes, + const SmallVectorImpl<SubRangeInfo> &SubRangeInfos, + const SmallVectorImpl<LiveInterval*> &Intervals) const { + unsigned NumClasses = Classes.getNumClasses(); + SmallVector<unsigned, 8> VNIMapping; + SmallVector<LiveInterval::SubRange*, 8> SubRanges; + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + for (const SubRangeInfo &SRInfo : SubRangeInfos) { + LiveInterval::SubRange &SR = *SRInfo.SR; + unsigned NumValNos = SR.valnos.size(); + VNIMapping.clear(); + VNIMapping.reserve(NumValNos); + SubRanges.clear(); + SubRanges.resize(NumClasses-1, nullptr); + for (unsigned I = 0; I < NumValNos; ++I) { + const VNInfo &VNI = *SR.valnos[I]; + unsigned LocalID = SRInfo.ConEQ.getEqClass(&VNI); + unsigned ID = Classes[LocalID + SRInfo.Index]; + VNIMapping.push_back(ID); + if (ID > 0 && SubRanges[ID-1] == nullptr) + SubRanges[ID-1] = Intervals[ID]->createSubRange(Allocator, SR.LaneMask); + } + DistributeRange(SR, SubRanges.data(), VNIMapping); + } +} + +static bool subRangeLiveAt(const LiveInterval &LI, SlotIndex Pos) { + for (const LiveInterval::SubRange &SR : LI.subranges()) { + if (SR.liveAt(Pos)) + return true; + } + return false; +} + +void RenameIndependentSubregs::computeMainRangesFixFlags( + const IntEqClasses &Classes, + const SmallVectorImpl<SubRangeInfo> &SubRangeInfos, + const SmallVectorImpl<LiveInterval*> &Intervals) const { + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + const SlotIndexes &Indexes = *LIS->getSlotIndexes(); + for (size_t I = 0, E = Intervals.size(); I < E; ++I) { + LiveInterval &LI = *Intervals[I]; + unsigned Reg = LI.reg; + + LI.removeEmptySubRanges(); + + // There must be a def (or live-in) before every use. Splitting vregs may + // violate this principle as the splitted vreg may not have a definition on + // every path. Fix this by creating IMPLICIT_DEF instruction as necessary. + for (const LiveInterval::SubRange &SR : LI.subranges()) { + // Search for "PHI" value numbers in the subranges. We must find a live + // value in each predecessor block, add an IMPLICIT_DEF where it is + // missing. + for (unsigned I = 0; I < SR.valnos.size(); ++I) { + const VNInfo &VNI = *SR.valnos[I]; + if (VNI.isUnused() || !VNI.isPHIDef()) + continue; + + SlotIndex Def = VNI.def; + MachineBasicBlock &MBB = *Indexes.getMBBFromIndex(Def); + for (MachineBasicBlock *PredMBB : MBB.predecessors()) { + SlotIndex PredEnd = Indexes.getMBBEndIdx(PredMBB); + if (subRangeLiveAt(LI, PredEnd.getPrevSlot())) + continue; + + MachineBasicBlock::iterator InsertPos = + llvm::findPHICopyInsertPoint(PredMBB, &MBB, Reg); + const MCInstrDesc &MCDesc = TII->get(TargetOpcode::IMPLICIT_DEF); + MachineInstrBuilder ImpDef = BuildMI(*PredMBB, InsertPos, + DebugLoc(), MCDesc, Reg); + SlotIndex DefIdx = LIS->InsertMachineInstrInMaps(*ImpDef); + SlotIndex RegDefIdx = DefIdx.getRegSlot(); + for (LiveInterval::SubRange &SR : LI.subranges()) { + VNInfo *SRVNI = SR.getNextValue(RegDefIdx, Allocator); + SR.addSegment(LiveRange::Segment(RegDefIdx, PredEnd, SRVNI)); + } + } + } + } + + for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { + if (!MO.isDef()) + continue; + unsigned SubRegIdx = MO.getSubReg(); + if (SubRegIdx == 0) + continue; + // After assigning the new vreg we may not have any other sublanes living + // in and out of the instruction anymore. We need to add new dead and + // undef flags in these cases. + if (!MO.isUndef()) { + SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent()); + if (!subRangeLiveAt(LI, Pos)) + MO.setIsUndef(); + } + if (!MO.isDead()) { + SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent()).getDeadSlot(); + if (!subRangeLiveAt(LI, Pos)) + MO.setIsDead(); + } + } + + if (I == 0) + LI.clear(); + LIS->constructMainRangeFromSubranges(LI); + } +} + +bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) { + // Skip renaming if liveness of subregister is not tracked. + if (!MF.getSubtarget().enableSubRegLiveness()) + return false; + + DEBUG(dbgs() << "Renaming independent subregister live ranges in " + << MF.getName() << '\n'); + + LIS = &getAnalysis<LiveIntervals>(); + MRI = &MF.getRegInfo(); + TII = MF.getSubtarget().getInstrInfo(); + + // Iterate over all vregs. Note that we query getNumVirtRegs() the newly + // created vregs end up with higher numbers but do not need to be visited as + // there can't be any further splitting. + bool Changed = false; + for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + if (!LIS->hasInterval(Reg)) + continue; + LiveInterval &LI = LIS->getInterval(Reg); + if (!LI.hasSubRanges()) + continue; + + Changed |= renameComponents(LI); + } + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm/lib/CodeGen/SafeStack.cpp new file mode 100644 index 0000000..4a1b995 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SafeStack.cpp @@ -0,0 +1,851 @@ +//===-- SafeStack.cpp - Safe Stack Insertion ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass splits the stack into the safe stack (kept as-is for LLVM backend) +// and the unsafe stack (explicitly allocated and managed through the runtime +// support library). +// +// http://clang.llvm.org/docs/SafeStack.html +// +//===----------------------------------------------------------------------===// + +#include "SafeStackColoring.h" +#include "SafeStackLayout.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_os_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; +using namespace llvm::safestack; + +#define DEBUG_TYPE "safestack" + +enum UnsafeStackPtrStorageVal { ThreadLocalUSP, SingleThreadUSP }; + +static cl::opt<UnsafeStackPtrStorageVal> USPStorage("safe-stack-usp-storage", + cl::Hidden, cl::init(ThreadLocalUSP), + cl::desc("Type of storage for the unsafe stack pointer"), + cl::values(clEnumValN(ThreadLocalUSP, "thread-local", + "Thread-local storage"), + clEnumValN(SingleThreadUSP, "single-thread", + "Non-thread-local storage"), + clEnumValEnd)); + +namespace llvm { + +STATISTIC(NumFunctions, "Total number of functions"); +STATISTIC(NumUnsafeStackFunctions, "Number of functions with unsafe stack"); +STATISTIC(NumUnsafeStackRestorePointsFunctions, + "Number of functions that use setjmp or exceptions"); + +STATISTIC(NumAllocas, "Total number of allocas"); +STATISTIC(NumUnsafeStaticAllocas, "Number of unsafe static allocas"); +STATISTIC(NumUnsafeDynamicAllocas, "Number of unsafe dynamic allocas"); +STATISTIC(NumUnsafeByValArguments, "Number of unsafe byval arguments"); +STATISTIC(NumUnsafeStackRestorePoints, "Number of setjmps and landingpads"); + +} // namespace llvm + +namespace { + +/// Rewrite an SCEV expression for a memory access address to an expression that +/// represents offset from the given alloca. +/// +/// The implementation simply replaces all mentions of the alloca with zero. +class AllocaOffsetRewriter : public SCEVRewriteVisitor<AllocaOffsetRewriter> { + const Value *AllocaPtr; + +public: + AllocaOffsetRewriter(ScalarEvolution &SE, const Value *AllocaPtr) + : SCEVRewriteVisitor(SE), AllocaPtr(AllocaPtr) {} + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + if (Expr->getValue() == AllocaPtr) + return SE.getZero(Expr->getType()); + return Expr; + } +}; + +/// The SafeStack pass splits the stack of each function into the safe +/// stack, which is only accessed through memory safe dereferences (as +/// determined statically), and the unsafe stack, which contains all +/// local variables that are accessed in ways that we can't prove to +/// be safe. +class SafeStack : public FunctionPass { + const TargetMachine *TM; + const TargetLoweringBase *TL; + const DataLayout *DL; + ScalarEvolution *SE; + + Type *StackPtrTy; + Type *IntPtrTy; + Type *Int32Ty; + Type *Int8Ty; + + Value *UnsafeStackPtr = nullptr; + + /// Unsafe stack alignment. Each stack frame must ensure that the stack is + /// aligned to this value. We need to re-align the unsafe stack if the + /// alignment of any object on the stack exceeds this value. + /// + /// 16 seems like a reasonable upper bound on the alignment of objects that we + /// might expect to appear on the stack on most common targets. + enum { StackAlignment = 16 }; + + /// \brief Build a value representing a pointer to the unsafe stack pointer. + Value *getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F); + + /// \brief Return the value of the stack canary. + Value *getStackGuard(IRBuilder<> &IRB, Function &F); + + /// \brief Load stack guard from the frame and check if it has changed. + void checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI, + AllocaInst *StackGuardSlot, Value *StackGuard); + + /// \brief Find all static allocas, dynamic allocas, return instructions and + /// stack restore points (exception unwind blocks and setjmp calls) in the + /// given function and append them to the respective vectors. + void findInsts(Function &F, SmallVectorImpl<AllocaInst *> &StaticAllocas, + SmallVectorImpl<AllocaInst *> &DynamicAllocas, + SmallVectorImpl<Argument *> &ByValArguments, + SmallVectorImpl<ReturnInst *> &Returns, + SmallVectorImpl<Instruction *> &StackRestorePoints); + + /// \brief Calculate the allocation size of a given alloca. Returns 0 if the + /// size can not be statically determined. + uint64_t getStaticAllocaAllocationSize(const AllocaInst* AI); + + /// \brief Allocate space for all static allocas in \p StaticAllocas, + /// replace allocas with pointers into the unsafe stack and generate code to + /// restore the stack pointer before all return instructions in \p Returns. + /// + /// \returns A pointer to the top of the unsafe stack after all unsafe static + /// allocas are allocated. + Value *moveStaticAllocasToUnsafeStack(IRBuilder<> &IRB, Function &F, + ArrayRef<AllocaInst *> StaticAllocas, + ArrayRef<Argument *> ByValArguments, + ArrayRef<ReturnInst *> Returns, + Instruction *BasePointer, + AllocaInst *StackGuardSlot); + + /// \brief Generate code to restore the stack after all stack restore points + /// in \p StackRestorePoints. + /// + /// \returns A local variable in which to maintain the dynamic top of the + /// unsafe stack if needed. + AllocaInst * + createStackRestorePoints(IRBuilder<> &IRB, Function &F, + ArrayRef<Instruction *> StackRestorePoints, + Value *StaticTop, bool NeedDynamicTop); + + /// \brief Replace all allocas in \p DynamicAllocas with code to allocate + /// space dynamically on the unsafe stack and store the dynamic unsafe stack + /// top to \p DynamicTop if non-null. + void moveDynamicAllocasToUnsafeStack(Function &F, Value *UnsafeStackPtr, + AllocaInst *DynamicTop, + ArrayRef<AllocaInst *> DynamicAllocas); + + bool IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize); + + bool IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U, + const Value *AllocaPtr, uint64_t AllocaSize); + bool IsAccessSafe(Value *Addr, uint64_t Size, const Value *AllocaPtr, + uint64_t AllocaSize); + +public: + static char ID; // Pass identification, replacement for typeid. + SafeStack(const TargetMachine *TM) + : FunctionPass(ID), TM(TM), TL(nullptr), DL(nullptr) { + initializeSafeStackPass(*PassRegistry::getPassRegistry()); + } + SafeStack() : SafeStack(nullptr) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<ScalarEvolutionWrapperPass>(); + } + + bool doInitialization(Module &M) override { + DL = &M.getDataLayout(); + + StackPtrTy = Type::getInt8PtrTy(M.getContext()); + IntPtrTy = DL->getIntPtrType(M.getContext()); + Int32Ty = Type::getInt32Ty(M.getContext()); + Int8Ty = Type::getInt8Ty(M.getContext()); + + return false; + } + + bool runOnFunction(Function &F) override; +}; // class SafeStack + +uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) { + uint64_t Size = DL->getTypeAllocSize(AI->getAllocatedType()); + if (AI->isArrayAllocation()) { + auto C = dyn_cast<ConstantInt>(AI->getArraySize()); + if (!C) + return 0; + Size *= C->getZExtValue(); + } + return Size; +} + +bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize, + const Value *AllocaPtr, uint64_t AllocaSize) { + AllocaOffsetRewriter Rewriter(*SE, AllocaPtr); + const SCEV *Expr = Rewriter.visit(SE->getSCEV(Addr)); + + uint64_t BitWidth = SE->getTypeSizeInBits(Expr->getType()); + ConstantRange AccessStartRange = SE->getUnsignedRange(Expr); + ConstantRange SizeRange = + ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AccessSize)); + ConstantRange AccessRange = AccessStartRange.add(SizeRange); + ConstantRange AllocaRange = + ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AllocaSize)); + bool Safe = AllocaRange.contains(AccessRange); + + DEBUG(dbgs() << "[SafeStack] " + << (isa<AllocaInst>(AllocaPtr) ? "Alloca " : "ByValArgument ") + << *AllocaPtr << "\n" + << " Access " << *Addr << "\n" + << " SCEV " << *Expr + << " U: " << SE->getUnsignedRange(Expr) + << ", S: " << SE->getSignedRange(Expr) << "\n" + << " Range " << AccessRange << "\n" + << " AllocaRange " << AllocaRange << "\n" + << " " << (Safe ? "safe" : "unsafe") << "\n"); + + return Safe; +} + +bool SafeStack::IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U, + const Value *AllocaPtr, + uint64_t AllocaSize) { + // All MemIntrinsics have destination address in Arg0 and size in Arg2. + if (MI->getRawDest() != U) return true; + const auto *Len = dyn_cast<ConstantInt>(MI->getLength()); + // Non-constant size => unsafe. FIXME: try SCEV getRange. + if (!Len) return false; + return IsAccessSafe(U, Len->getZExtValue(), AllocaPtr, AllocaSize); +} + +/// Check whether a given allocation must be put on the safe +/// stack or not. The function analyzes all uses of AI and checks whether it is +/// only accessed in a memory safe way (as decided statically). +bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { + // Go through all uses of this alloca and check whether all accesses to the + // allocated object are statically known to be memory safe and, hence, the + // object can be placed on the safe stack. + SmallPtrSet<const Value *, 16> Visited; + SmallVector<const Value *, 8> WorkList; + WorkList.push_back(AllocaPtr); + + // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc. + while (!WorkList.empty()) { + const Value *V = WorkList.pop_back_val(); + for (const Use &UI : V->uses()) { + auto I = cast<const Instruction>(UI.getUser()); + assert(V == UI.get()); + + switch (I->getOpcode()) { + case Instruction::Load: { + if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getType()), AllocaPtr, + AllocaSize)) + return false; + break; + } + case Instruction::VAArg: + // "va-arg" from a pointer is safe. + break; + case Instruction::Store: { + if (V == I->getOperand(0)) { + // Stored the pointer - conservatively assume it may be unsafe. + DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr + << "\n store of address: " << *I << "\n"); + return false; + } + + if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getOperand(0)->getType()), + AllocaPtr, AllocaSize)) + return false; + break; + } + case Instruction::Ret: { + // Information leak. + return false; + } + + case Instruction::Call: + case Instruction::Invoke: { + ImmutableCallSite CS(I); + + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) + continue; + } + + if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { + if (!IsMemIntrinsicSafe(MI, UI, AllocaPtr, AllocaSize)) { + DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr + << "\n unsafe memintrinsic: " << *I + << "\n"); + return false; + } + continue; + } + + // LLVM 'nocapture' attribute is only set for arguments whose address + // is not stored, passed around, or used in any other non-trivial way. + // We assume that passing a pointer to an object as a 'nocapture + // readnone' argument is safe. + // FIXME: a more precise solution would require an interprocedural + // analysis here, which would look at all uses of an argument inside + // the function being called. + ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); + for (ImmutableCallSite::arg_iterator A = B; A != E; ++A) + if (A->get() == V) + if (!(CS.doesNotCapture(A - B) && (CS.doesNotAccessMemory(A - B) || + CS.doesNotAccessMemory()))) { + DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr + << "\n unsafe call: " << *I << "\n"); + return false; + } + continue; + } + + default: + if (Visited.insert(I).second) + WorkList.push_back(cast<const Instruction>(I)); + } + } + } + + // All uses of the alloca are safe, we can place it on the safe stack. + return true; +} + +Value *SafeStack::getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F) { + // Check if there is a target-specific location for the unsafe stack pointer. + if (TL) + if (Value *V = TL->getSafeStackPointerLocation(IRB)) + return V; + + // Otherwise, assume the target links with compiler-rt, which provides a + // thread-local variable with a magic name. + Module &M = *F.getParent(); + const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr"; + auto UnsafeStackPtr = + dyn_cast_or_null<GlobalVariable>(M.getNamedValue(UnsafeStackPtrVar)); + + bool UseTLS = USPStorage == ThreadLocalUSP; + + if (!UnsafeStackPtr) { + auto TLSModel = UseTLS ? + GlobalValue::InitialExecTLSModel : + GlobalValue::NotThreadLocal; + // The global variable is not defined yet, define it ourselves. + // We use the initial-exec TLS model because we do not support the + // variable living anywhere other than in the main executable. + UnsafeStackPtr = new GlobalVariable( + M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr, + UnsafeStackPtrVar, nullptr, TLSModel); + } else { + // The variable exists, check its type and attributes. + if (UnsafeStackPtr->getValueType() != StackPtrTy) + report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type"); + if (UseTLS != UnsafeStackPtr->isThreadLocal()) + report_fatal_error(Twine(UnsafeStackPtrVar) + " must " + + (UseTLS ? "" : "not ") + "be thread-local"); + } + return UnsafeStackPtr; +} + +Value *SafeStack::getStackGuard(IRBuilder<> &IRB, Function &F) { + Value *StackGuardVar = nullptr; + if (TL) + StackGuardVar = TL->getIRStackGuard(IRB); + if (!StackGuardVar) + StackGuardVar = + F.getParent()->getOrInsertGlobal("__stack_chk_guard", StackPtrTy); + return IRB.CreateLoad(StackGuardVar, "StackGuard"); +} + +void SafeStack::findInsts(Function &F, + SmallVectorImpl<AllocaInst *> &StaticAllocas, + SmallVectorImpl<AllocaInst *> &DynamicAllocas, + SmallVectorImpl<Argument *> &ByValArguments, + SmallVectorImpl<ReturnInst *> &Returns, + SmallVectorImpl<Instruction *> &StackRestorePoints) { + for (Instruction &I : instructions(&F)) { + if (auto AI = dyn_cast<AllocaInst>(&I)) { + ++NumAllocas; + + uint64_t Size = getStaticAllocaAllocationSize(AI); + if (IsSafeStackAlloca(AI, Size)) + continue; + + if (AI->isStaticAlloca()) { + ++NumUnsafeStaticAllocas; + StaticAllocas.push_back(AI); + } else { + ++NumUnsafeDynamicAllocas; + DynamicAllocas.push_back(AI); + } + } else if (auto RI = dyn_cast<ReturnInst>(&I)) { + Returns.push_back(RI); + } else if (auto CI = dyn_cast<CallInst>(&I)) { + // setjmps require stack restore. + if (CI->getCalledFunction() && CI->canReturnTwice()) + StackRestorePoints.push_back(CI); + } else if (auto LP = dyn_cast<LandingPadInst>(&I)) { + // Exception landing pads require stack restore. + StackRestorePoints.push_back(LP); + } else if (auto II = dyn_cast<IntrinsicInst>(&I)) { + if (II->getIntrinsicID() == Intrinsic::gcroot) + llvm::report_fatal_error( + "gcroot intrinsic not compatible with safestack attribute"); + } + } + for (Argument &Arg : F.args()) { + if (!Arg.hasByValAttr()) + continue; + uint64_t Size = + DL->getTypeStoreSize(Arg.getType()->getPointerElementType()); + if (IsSafeStackAlloca(&Arg, Size)) + continue; + + ++NumUnsafeByValArguments; + ByValArguments.push_back(&Arg); + } +} + +AllocaInst * +SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F, + ArrayRef<Instruction *> StackRestorePoints, + Value *StaticTop, bool NeedDynamicTop) { + assert(StaticTop && "The stack top isn't set."); + + if (StackRestorePoints.empty()) + return nullptr; + + // We need the current value of the shadow stack pointer to restore + // after longjmp or exception catching. + + // FIXME: On some platforms this could be handled by the longjmp/exception + // runtime itself. + + AllocaInst *DynamicTop = nullptr; + if (NeedDynamicTop) { + // If we also have dynamic alloca's, the stack pointer value changes + // throughout the function. For now we store it in an alloca. + DynamicTop = IRB.CreateAlloca(StackPtrTy, /*ArraySize=*/nullptr, + "unsafe_stack_dynamic_ptr"); + IRB.CreateStore(StaticTop, DynamicTop); + } + + // Restore current stack pointer after longjmp/exception catch. + for (Instruction *I : StackRestorePoints) { + ++NumUnsafeStackRestorePoints; + + IRB.SetInsertPoint(I->getNextNode()); + Value *CurrentTop = DynamicTop ? IRB.CreateLoad(DynamicTop) : StaticTop; + IRB.CreateStore(CurrentTop, UnsafeStackPtr); + } + + return DynamicTop; +} + +void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI, + AllocaInst *StackGuardSlot, Value *StackGuard) { + Value *V = IRB.CreateLoad(StackGuardSlot); + Value *Cmp = IRB.CreateICmpNE(StackGuard, V); + + auto SuccessProb = BranchProbabilityInfo::getBranchProbStackProtector(true); + auto FailureProb = BranchProbabilityInfo::getBranchProbStackProtector(false); + MDNode *Weights = MDBuilder(F.getContext()) + .createBranchWeights(SuccessProb.getNumerator(), + FailureProb.getNumerator()); + Instruction *CheckTerm = + SplitBlockAndInsertIfThen(Cmp, &RI, + /* Unreachable */ true, Weights); + IRBuilder<> IRBFail(CheckTerm); + // FIXME: respect -fsanitize-trap / -ftrap-function here? + Constant *StackChkFail = F.getParent()->getOrInsertFunction( + "__stack_chk_fail", IRB.getVoidTy(), nullptr); + IRBFail.CreateCall(StackChkFail, {}); +} + +/// We explicitly compute and set the unsafe stack layout for all unsafe +/// static alloca instructions. We save the unsafe "base pointer" in the +/// prologue into a local variable and restore it in the epilogue. +Value *SafeStack::moveStaticAllocasToUnsafeStack( + IRBuilder<> &IRB, Function &F, ArrayRef<AllocaInst *> StaticAllocas, + ArrayRef<Argument *> ByValArguments, ArrayRef<ReturnInst *> Returns, + Instruction *BasePointer, AllocaInst *StackGuardSlot) { + if (StaticAllocas.empty() && ByValArguments.empty()) + return BasePointer; + + DIBuilder DIB(*F.getParent()); + + StackColoring SSC(F, StaticAllocas); + SSC.run(); + SSC.removeAllMarkers(); + + // Unsafe stack always grows down. + StackLayout SSL(StackAlignment); + if (StackGuardSlot) { + Type *Ty = StackGuardSlot->getAllocatedType(); + unsigned Align = + std::max(DL->getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment()); + SSL.addObject(StackGuardSlot, getStaticAllocaAllocationSize(StackGuardSlot), + Align, SSC.getFullLiveRange()); + } + + for (Argument *Arg : ByValArguments) { + Type *Ty = Arg->getType()->getPointerElementType(); + uint64_t Size = DL->getTypeStoreSize(Ty); + if (Size == 0) + Size = 1; // Don't create zero-sized stack objects. + + // Ensure the object is properly aligned. + unsigned Align = std::max((unsigned)DL->getPrefTypeAlignment(Ty), + Arg->getParamAlignment()); + SSL.addObject(Arg, Size, Align, SSC.getFullLiveRange()); + } + + for (AllocaInst *AI : StaticAllocas) { + Type *Ty = AI->getAllocatedType(); + uint64_t Size = getStaticAllocaAllocationSize(AI); + if (Size == 0) + Size = 1; // Don't create zero-sized stack objects. + + // Ensure the object is properly aligned. + unsigned Align = + std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI->getAlignment()); + + SSL.addObject(AI, Size, Align, SSC.getLiveRange(AI)); + } + + SSL.computeLayout(); + unsigned FrameAlignment = SSL.getFrameAlignment(); + + // FIXME: tell SSL that we start at a less-then-MaxAlignment aligned location + // (AlignmentSkew). + if (FrameAlignment > StackAlignment) { + // Re-align the base pointer according to the max requested alignment. + assert(isPowerOf2_32(FrameAlignment)); + IRB.SetInsertPoint(BasePointer->getNextNode()); + BasePointer = cast<Instruction>(IRB.CreateIntToPtr( + IRB.CreateAnd(IRB.CreatePtrToInt(BasePointer, IntPtrTy), + ConstantInt::get(IntPtrTy, ~uint64_t(FrameAlignment - 1))), + StackPtrTy)); + } + + IRB.SetInsertPoint(BasePointer->getNextNode()); + + if (StackGuardSlot) { + unsigned Offset = SSL.getObjectOffset(StackGuardSlot); + Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8* + ConstantInt::get(Int32Ty, -Offset)); + Value *NewAI = + IRB.CreateBitCast(Off, StackGuardSlot->getType(), "StackGuardSlot"); + + // Replace alloc with the new location. + StackGuardSlot->replaceAllUsesWith(NewAI); + StackGuardSlot->eraseFromParent(); + } + + for (Argument *Arg : ByValArguments) { + unsigned Offset = SSL.getObjectOffset(Arg); + Type *Ty = Arg->getType()->getPointerElementType(); + + uint64_t Size = DL->getTypeStoreSize(Ty); + if (Size == 0) + Size = 1; // Don't create zero-sized stack objects. + + Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8* + ConstantInt::get(Int32Ty, -Offset)); + Value *NewArg = IRB.CreateBitCast(Off, Arg->getType(), + Arg->getName() + ".unsafe-byval"); + + // Replace alloc with the new location. + replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB, + /*Deref=*/true, -Offset); + Arg->replaceAllUsesWith(NewArg); + IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode()); + IRB.CreateMemCpy(Off, Arg, Size, Arg->getParamAlignment()); + } + + // Allocate space for every unsafe static AllocaInst on the unsafe stack. + for (AllocaInst *AI : StaticAllocas) { + IRB.SetInsertPoint(AI); + unsigned Offset = SSL.getObjectOffset(AI); + + uint64_t Size = getStaticAllocaAllocationSize(AI); + if (Size == 0) + Size = 1; // Don't create zero-sized stack objects. + + replaceDbgDeclareForAlloca(AI, BasePointer, DIB, /*Deref=*/true, -Offset); + replaceDbgValueForAlloca(AI, BasePointer, DIB, -Offset); + + // Replace uses of the alloca with the new location. + // Insert address calculation close to each use to work around PR27844. + std::string Name = std::string(AI->getName()) + ".unsafe"; + while (!AI->use_empty()) { + Use &U = *AI->use_begin(); + Instruction *User = cast<Instruction>(U.getUser()); + + Instruction *InsertBefore; + if (auto *PHI = dyn_cast<PHINode>(User)) + InsertBefore = PHI->getIncomingBlock(U)->getTerminator(); + else + InsertBefore = User; + + IRBuilder<> IRBUser(InsertBefore); + Value *Off = IRBUser.CreateGEP(BasePointer, // BasePointer is i8* + ConstantInt::get(Int32Ty, -Offset)); + Value *Replacement = IRBUser.CreateBitCast(Off, AI->getType(), Name); + + if (auto *PHI = dyn_cast<PHINode>(User)) { + // PHI nodes may have multiple incoming edges from the same BB (why??), + // all must be updated at once with the same incoming value. + auto *BB = PHI->getIncomingBlock(U); + for (unsigned I = 0; I < PHI->getNumIncomingValues(); ++I) + if (PHI->getIncomingBlock(I) == BB) + PHI->setIncomingValue(I, Replacement); + } else { + U.set(Replacement); + } + } + + AI->eraseFromParent(); + } + + // Re-align BasePointer so that our callees would see it aligned as + // expected. + // FIXME: no need to update BasePointer in leaf functions. + unsigned FrameSize = alignTo(SSL.getFrameSize(), StackAlignment); + + // Update shadow stack pointer in the function epilogue. + IRB.SetInsertPoint(BasePointer->getNextNode()); + + Value *StaticTop = + IRB.CreateGEP(BasePointer, ConstantInt::get(Int32Ty, -FrameSize), + "unsafe_stack_static_top"); + IRB.CreateStore(StaticTop, UnsafeStackPtr); + return StaticTop; +} + +void SafeStack::moveDynamicAllocasToUnsafeStack( + Function &F, Value *UnsafeStackPtr, AllocaInst *DynamicTop, + ArrayRef<AllocaInst *> DynamicAllocas) { + DIBuilder DIB(*F.getParent()); + + for (AllocaInst *AI : DynamicAllocas) { + IRBuilder<> IRB(AI); + + // Compute the new SP value (after AI). + Value *ArraySize = AI->getArraySize(); + if (ArraySize->getType() != IntPtrTy) + ArraySize = IRB.CreateIntCast(ArraySize, IntPtrTy, false); + + Type *Ty = AI->getAllocatedType(); + uint64_t TySize = DL->getTypeAllocSize(Ty); + Value *Size = IRB.CreateMul(ArraySize, ConstantInt::get(IntPtrTy, TySize)); + + Value *SP = IRB.CreatePtrToInt(IRB.CreateLoad(UnsafeStackPtr), IntPtrTy); + SP = IRB.CreateSub(SP, Size); + + // Align the SP value to satisfy the AllocaInst, type and stack alignments. + unsigned Align = std::max( + std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI->getAlignment()), + (unsigned)StackAlignment); + + assert(isPowerOf2_32(Align)); + Value *NewTop = IRB.CreateIntToPtr( + IRB.CreateAnd(SP, ConstantInt::get(IntPtrTy, ~uint64_t(Align - 1))), + StackPtrTy); + + // Save the stack pointer. + IRB.CreateStore(NewTop, UnsafeStackPtr); + if (DynamicTop) + IRB.CreateStore(NewTop, DynamicTop); + + Value *NewAI = IRB.CreatePointerCast(NewTop, AI->getType()); + if (AI->hasName() && isa<Instruction>(NewAI)) + NewAI->takeName(AI); + + replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/true); + AI->replaceAllUsesWith(NewAI); + AI->eraseFromParent(); + } + + if (!DynamicAllocas.empty()) { + // Now go through the instructions again, replacing stacksave/stackrestore. + for (inst_iterator It = inst_begin(&F), Ie = inst_end(&F); It != Ie;) { + Instruction *I = &*(It++); + auto II = dyn_cast<IntrinsicInst>(I); + if (!II) + continue; + + if (II->getIntrinsicID() == Intrinsic::stacksave) { + IRBuilder<> IRB(II); + Instruction *LI = IRB.CreateLoad(UnsafeStackPtr); + LI->takeName(II); + II->replaceAllUsesWith(LI); + II->eraseFromParent(); + } else if (II->getIntrinsicID() == Intrinsic::stackrestore) { + IRBuilder<> IRB(II); + Instruction *SI = IRB.CreateStore(II->getArgOperand(0), UnsafeStackPtr); + SI->takeName(II); + assert(II->use_empty()); + II->eraseFromParent(); + } + } + } +} + +bool SafeStack::runOnFunction(Function &F) { + DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n"); + + if (!F.hasFnAttribute(Attribute::SafeStack)) { + DEBUG(dbgs() << "[SafeStack] safestack is not requested" + " for this function\n"); + return false; + } + + if (F.isDeclaration()) { + DEBUG(dbgs() << "[SafeStack] function definition" + " is not available\n"); + return false; + } + + TL = TM ? TM->getSubtargetImpl(F)->getTargetLowering() : nullptr; + SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); + + ++NumFunctions; + + SmallVector<AllocaInst *, 16> StaticAllocas; + SmallVector<AllocaInst *, 4> DynamicAllocas; + SmallVector<Argument *, 4> ByValArguments; + SmallVector<ReturnInst *, 4> Returns; + + // Collect all points where stack gets unwound and needs to be restored + // This is only necessary because the runtime (setjmp and unwind code) is + // not aware of the unsafe stack and won't unwind/restore it prorerly. + // To work around this problem without changing the runtime, we insert + // instrumentation to restore the unsafe stack pointer when necessary. + SmallVector<Instruction *, 4> StackRestorePoints; + + // Find all static and dynamic alloca instructions that must be moved to the + // unsafe stack, all return instructions and stack restore points. + findInsts(F, StaticAllocas, DynamicAllocas, ByValArguments, Returns, + StackRestorePoints); + + if (StaticAllocas.empty() && DynamicAllocas.empty() && + ByValArguments.empty() && StackRestorePoints.empty()) + return false; // Nothing to do in this function. + + if (!StaticAllocas.empty() || !DynamicAllocas.empty() || + !ByValArguments.empty()) + ++NumUnsafeStackFunctions; // This function has the unsafe stack. + + if (!StackRestorePoints.empty()) + ++NumUnsafeStackRestorePointsFunctions; + + IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt()); + UnsafeStackPtr = getOrCreateUnsafeStackPtr(IRB, F); + + // Load the current stack pointer (we'll also use it as a base pointer). + // FIXME: use a dedicated register for it ? + Instruction *BasePointer = + IRB.CreateLoad(UnsafeStackPtr, false, "unsafe_stack_ptr"); + assert(BasePointer->getType() == StackPtrTy); + + AllocaInst *StackGuardSlot = nullptr; + // FIXME: implement weaker forms of stack protector. + if (F.hasFnAttribute(Attribute::StackProtect) || + F.hasFnAttribute(Attribute::StackProtectStrong) || + F.hasFnAttribute(Attribute::StackProtectReq)) { + Value *StackGuard = getStackGuard(IRB, F); + StackGuardSlot = IRB.CreateAlloca(StackPtrTy, nullptr); + IRB.CreateStore(StackGuard, StackGuardSlot); + + for (ReturnInst *RI : Returns) { + IRBuilder<> IRBRet(RI); + checkStackGuard(IRBRet, F, *RI, StackGuardSlot, StackGuard); + } + } + + // The top of the unsafe stack after all unsafe static allocas are + // allocated. + Value *StaticTop = + moveStaticAllocasToUnsafeStack(IRB, F, StaticAllocas, ByValArguments, + Returns, BasePointer, StackGuardSlot); + + // Safe stack object that stores the current unsafe stack top. It is updated + // as unsafe dynamic (non-constant-sized) allocas are allocated and freed. + // This is only needed if we need to restore stack pointer after longjmp + // or exceptions, and we have dynamic allocations. + // FIXME: a better alternative might be to store the unsafe stack pointer + // before setjmp / invoke instructions. + AllocaInst *DynamicTop = createStackRestorePoints( + IRB, F, StackRestorePoints, StaticTop, !DynamicAllocas.empty()); + + // Handle dynamic allocas. + moveDynamicAllocasToUnsafeStack(F, UnsafeStackPtr, DynamicTop, + DynamicAllocas); + + // Restore the unsafe stack pointer before each return. + for (ReturnInst *RI : Returns) { + IRB.SetInsertPoint(RI); + IRB.CreateStore(BasePointer, UnsafeStackPtr); + } + + DEBUG(dbgs() << "[SafeStack] safestack applied\n"); + return true; +} + +} // anonymous namespace + +char SafeStack::ID = 0; +INITIALIZE_TM_PASS_BEGIN(SafeStack, "safe-stack", + "Safe Stack instrumentation pass", false, false) +INITIALIZE_TM_PASS_END(SafeStack, "safe-stack", + "Safe Stack instrumentation pass", false, false) + +FunctionPass *llvm::createSafeStackPass(const llvm::TargetMachine *TM) { + return new SafeStack(TM); +} diff --git a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp new file mode 100644 index 0000000..795eb8d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp @@ -0,0 +1,291 @@ +//===-- SafeStackColoring.cpp - SafeStack frame coloring -------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SafeStackColoring.h" + +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; +using namespace llvm::safestack; + +#define DEBUG_TYPE "safestackcoloring" + +static cl::opt<bool> ClColoring("safe-stack-coloring", + cl::desc("enable safe stack coloring"), + cl::Hidden, cl::init(true)); + +const StackColoring::LiveRange &StackColoring::getLiveRange(AllocaInst *AI) { + const auto IT = AllocaNumbering.find(AI); + assert(IT != AllocaNumbering.end()); + return LiveRanges[IT->second]; +} + +bool StackColoring::readMarker(Instruction *I, bool *IsStart) { + auto *II = dyn_cast<IntrinsicInst>(I); + if (!II || (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end)) + return false; + + *IsStart = II->getIntrinsicID() == Intrinsic::lifetime_start; + return true; +} + +void StackColoring::removeAllMarkers() { + for (auto *I : Markers) { + auto *Op = dyn_cast<Instruction>(I->getOperand(1)); + I->eraseFromParent(); + // Remove the operand bitcast, too, if it has no more uses left. + if (Op && Op->use_empty()) + Op->eraseFromParent(); + } +} + +void StackColoring::collectMarkers() { + InterestingAllocas.resize(NumAllocas); + DenseMap<BasicBlock *, SmallDenseMap<Instruction *, Marker>> BBMarkerSet; + + // Compute the set of start/end markers per basic block. + for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) { + AllocaInst *AI = Allocas[AllocaNo]; + SmallVector<Instruction *, 8> WorkList; + WorkList.push_back(AI); + while (!WorkList.empty()) { + Instruction *I = WorkList.pop_back_val(); + for (User *U : I->users()) { + if (auto *BI = dyn_cast<BitCastInst>(U)) { + WorkList.push_back(BI); + continue; + } + auto *UI = dyn_cast<Instruction>(U); + if (!UI) + continue; + bool IsStart; + if (!readMarker(UI, &IsStart)) + continue; + if (IsStart) + InterestingAllocas.set(AllocaNo); + BBMarkerSet[UI->getParent()][UI] = {AllocaNo, IsStart}; + Markers.push_back(UI); + } + } + } + + // Compute instruction numbering. Only the following instructions are + // considered: + // * Basic block entries + // * Lifetime markers + // For each basic block, compute + // * the list of markers in the instruction order + // * the sets of allocas whose lifetime starts or ends in this BB + DEBUG(dbgs() << "Instructions:\n"); + unsigned InstNo = 0; + for (BasicBlock *BB : depth_first(&F)) { + DEBUG(dbgs() << " " << InstNo << ": BB " << BB->getName() << "\n"); + unsigned BBStart = InstNo++; + + BlockLifetimeInfo &BlockInfo = BlockLiveness[BB]; + BlockInfo.Begin.resize(NumAllocas); + BlockInfo.End.resize(NumAllocas); + BlockInfo.LiveIn.resize(NumAllocas); + BlockInfo.LiveOut.resize(NumAllocas); + + auto &BlockMarkerSet = BBMarkerSet[BB]; + if (BlockMarkerSet.empty()) { + unsigned BBEnd = InstNo; + BlockInstRange[BB] = std::make_pair(BBStart, BBEnd); + continue; + } + + auto ProcessMarker = [&](Instruction *I, const Marker &M) { + DEBUG(dbgs() << " " << InstNo << ": " + << (M.IsStart ? "start " : "end ") << M.AllocaNo << ", " + << *I << "\n"); + + BBMarkers[BB].push_back({InstNo, M}); + + InstructionNumbering[I] = InstNo++; + + if (M.IsStart) { + if (BlockInfo.End.test(M.AllocaNo)) + BlockInfo.End.reset(M.AllocaNo); + BlockInfo.Begin.set(M.AllocaNo); + } else { + if (BlockInfo.Begin.test(M.AllocaNo)) + BlockInfo.Begin.reset(M.AllocaNo); + BlockInfo.End.set(M.AllocaNo); + } + }; + + if (BlockMarkerSet.size() == 1) { + ProcessMarker(BlockMarkerSet.begin()->getFirst(), + BlockMarkerSet.begin()->getSecond()); + } else { + // Scan the BB to determine the marker order. + for (Instruction &I : *BB) { + auto It = BlockMarkerSet.find(&I); + if (It == BlockMarkerSet.end()) + continue; + ProcessMarker(&I, It->getSecond()); + } + } + + unsigned BBEnd = InstNo; + BlockInstRange[BB] = std::make_pair(BBStart, BBEnd); + } + NumInst = InstNo; +} + +void StackColoring::calculateLocalLiveness() { + bool changed = true; + while (changed) { + changed = false; + + for (BasicBlock *BB : depth_first(&F)) { + BlockLifetimeInfo &BlockInfo = BlockLiveness[BB]; + + // Compute LiveIn by unioning together the LiveOut sets of all preds. + BitVector LocalLiveIn; + for (auto *PredBB : predecessors(BB)) { + LivenessMap::const_iterator I = BlockLiveness.find(PredBB); + assert(I != BlockLiveness.end() && "Predecessor not found"); + LocalLiveIn |= I->second.LiveOut; + } + + // Compute LiveOut by subtracting out lifetimes that end in this + // block, then adding in lifetimes that begin in this block. If + // we have both BEGIN and END markers in the same basic block + // then we know that the BEGIN marker comes after the END, + // because we already handle the case where the BEGIN comes + // before the END when collecting the markers (and building the + // BEGIN/END vectors). + BitVector LocalLiveOut = LocalLiveIn; + LocalLiveOut.reset(BlockInfo.End); + LocalLiveOut |= BlockInfo.Begin; + + // Update block LiveIn set, noting whether it has changed. + if (LocalLiveIn.test(BlockInfo.LiveIn)) { + changed = true; + BlockInfo.LiveIn |= LocalLiveIn; + } + + // Update block LiveOut set, noting whether it has changed. + if (LocalLiveOut.test(BlockInfo.LiveOut)) { + changed = true; + BlockInfo.LiveOut |= LocalLiveOut; + } + } + } // while changed. +} + +void StackColoring::calculateLiveIntervals() { + for (auto IT : BlockLiveness) { + BasicBlock *BB = IT.getFirst(); + BlockLifetimeInfo &BlockInfo = IT.getSecond(); + unsigned BBStart, BBEnd; + std::tie(BBStart, BBEnd) = BlockInstRange[BB]; + + BitVector Started, Ended; + Started.resize(NumAllocas); + Ended.resize(NumAllocas); + SmallVector<unsigned, 8> Start; + Start.resize(NumAllocas); + + // LiveIn ranges start at the first instruction. + for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) { + if (BlockInfo.LiveIn.test(AllocaNo)) { + Started.set(AllocaNo); + Start[AllocaNo] = BBStart; + } + } + + for (auto &It : BBMarkers[BB]) { + unsigned InstNo = It.first; + bool IsStart = It.second.IsStart; + unsigned AllocaNo = It.second.AllocaNo; + + if (IsStart) { + assert(!Started.test(AllocaNo)); + Started.set(AllocaNo); + Ended.reset(AllocaNo); + Start[AllocaNo] = InstNo; + } else { + assert(!Ended.test(AllocaNo)); + if (Started.test(AllocaNo)) { + LiveRanges[AllocaNo].AddRange(Start[AllocaNo], InstNo); + Started.reset(AllocaNo); + } + Ended.set(AllocaNo); + } + } + + for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) + if (Started.test(AllocaNo)) + LiveRanges[AllocaNo].AddRange(Start[AllocaNo], BBEnd); + } +} + +LLVM_DUMP_METHOD void StackColoring::dumpAllocas() { + dbgs() << "Allocas:\n"; + for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) + dbgs() << " " << AllocaNo << ": " << *Allocas[AllocaNo] << "\n"; +} + +LLVM_DUMP_METHOD void StackColoring::dumpBlockLiveness() { + dbgs() << "Block liveness:\n"; + for (auto IT : BlockLiveness) { + BasicBlock *BB = IT.getFirst(); + BlockLifetimeInfo &BlockInfo = BlockLiveness[BB]; + auto BlockRange = BlockInstRange[BB]; + dbgs() << " BB [" << BlockRange.first << ", " << BlockRange.second + << "): begin " << BlockInfo.Begin << ", end " << BlockInfo.End + << ", livein " << BlockInfo.LiveIn << ", liveout " + << BlockInfo.LiveOut << "\n"; + } +} + +LLVM_DUMP_METHOD void StackColoring::dumpLiveRanges() { + dbgs() << "Alloca liveness:\n"; + for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo) { + LiveRange &Range = LiveRanges[AllocaNo]; + dbgs() << " " << AllocaNo << ": " << Range << "\n"; + } +} + +void StackColoring::run() { + DEBUG(dumpAllocas()); + + for (unsigned I = 0; I < NumAllocas; ++I) + AllocaNumbering[Allocas[I]] = I; + LiveRanges.resize(NumAllocas); + + collectMarkers(); + + if (!ClColoring) { + for (auto &R : LiveRanges) { + R.SetMaximum(1); + R.AddRange(0, 1); + } + return; + } + + for (auto &R : LiveRanges) + R.SetMaximum(NumInst); + for (unsigned I = 0; I < NumAllocas; ++I) + if (!InterestingAllocas.test(I)) + LiveRanges[I] = getFullLiveRange(); + + calculateLocalLiveness(); + DEBUG(dumpBlockLiveness()); + calculateLiveIntervals(); + DEBUG(dumpLiveRanges()); +} diff --git a/contrib/llvm/lib/CodeGen/SafeStackColoring.h b/contrib/llvm/lib/CodeGen/SafeStackColoring.h new file mode 100644 index 0000000..08b179c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SafeStackColoring.h @@ -0,0 +1,149 @@ +//===-- SafeStackColoring.h - SafeStack frame coloring ---------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H +#define LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/raw_os_ostream.h" + +namespace llvm { +class AllocaInst; + +namespace safestack { +/// Compute live ranges of allocas. +/// Live ranges are represented as sets of "interesting" instructions, which are +/// defined as instructions that may start or end an alloca's lifetime. These +/// are: +/// * lifetime.start and lifetime.end intrinsics +/// * first instruction of any basic block +/// Interesting instructions are numbered in the depth-first walk of the CFG, +/// and in the program order inside each basic block. +class StackColoring { + /// A class representing liveness information for a single basic block. + /// Each bit in the BitVector represents the liveness property + /// for a different stack slot. + struct BlockLifetimeInfo { + /// Which slots BEGINs in each basic block. + BitVector Begin; + /// Which slots ENDs in each basic block. + BitVector End; + /// Which slots are marked as LIVE_IN, coming into each basic block. + BitVector LiveIn; + /// Which slots are marked as LIVE_OUT, coming out of each basic block. + BitVector LiveOut; + }; + +public: + /// This class represents a set of interesting instructions where an alloca is + /// live. + struct LiveRange { + BitVector bv; + void SetMaximum(int size) { bv.resize(size); } + void AddRange(unsigned start, unsigned end) { bv.set(start, end); } + bool Overlaps(const LiveRange &Other) const { + return bv.anyCommon(Other.bv); + } + void Join(const LiveRange &Other) { bv |= Other.bv; } + }; + +private: + Function &F; + + /// Maps active slots (per bit) for each basic block. + typedef DenseMap<BasicBlock *, BlockLifetimeInfo> LivenessMap; + LivenessMap BlockLiveness; + + /// Number of interesting instructions. + int NumInst; + /// Numeric ids for interesting instructions. + DenseMap<Instruction *, unsigned> InstructionNumbering; + /// A range [Start, End) of instruction ids for each basic block. + /// Instructions inside each BB have monotonic and consecutive ids. + DenseMap<const BasicBlock *, std::pair<unsigned, unsigned>> BlockInstRange; + + ArrayRef<AllocaInst *> Allocas; + unsigned NumAllocas; + DenseMap<AllocaInst *, unsigned> AllocaNumbering; + /// LiveRange for allocas. + SmallVector<LiveRange, 8> LiveRanges; + + /// The set of allocas that have at least one lifetime.start. All other + /// allocas get LiveRange that corresponds to the entire function. + BitVector InterestingAllocas; + SmallVector<Instruction *, 8> Markers; + + struct Marker { + unsigned AllocaNo; + bool IsStart; + }; + + /// List of {InstNo, {AllocaNo, IsStart}} for each BB, ordered by InstNo. + DenseMap<BasicBlock *, SmallVector<std::pair<unsigned, Marker>, 4>> BBMarkers; + + void dumpAllocas(); + void dumpBlockLiveness(); + void dumpLiveRanges(); + + bool readMarker(Instruction *I, bool *IsStart); + void collectMarkers(); + void calculateLocalLiveness(); + void calculateLiveIntervals(); + +public: + StackColoring(Function &F, ArrayRef<AllocaInst *> Allocas) + : F(F), NumInst(-1), Allocas(Allocas), NumAllocas(Allocas.size()) {} + + void run(); + void removeAllMarkers(); + + /// Returns a set of "interesting" instructions where the given alloca is + /// live. Not all instructions in a function are interesting: we pick a set + /// that is large enough for LiveRange::Overlaps to be correct. + const LiveRange &getLiveRange(AllocaInst *AI); + + /// Returns a live range that represents an alloca that is live throughout the + /// entire function. + LiveRange getFullLiveRange() { + assert(NumInst >= 0); + LiveRange R; + R.SetMaximum(NumInst); + R.AddRange(0, NumInst); + return R; + } +}; + +static inline raw_ostream &operator<<(raw_ostream &OS, const BitVector &V) { + OS << "{"; + int idx = V.find_first(); + bool first = true; + while (idx >= 0) { + if (!first) { + OS << ", "; + } + first = false; + OS << idx; + idx = V.find_next(idx); + } + OS << "}"; + return OS; +} + +static inline raw_ostream &operator<<(raw_ostream &OS, + const StackColoring::LiveRange &R) { + return OS << R.bv; +} + +} // namespace safestack +} // namespace llvm + +#endif // LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H diff --git a/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp b/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp new file mode 100644 index 0000000..fb433c1 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp @@ -0,0 +1,139 @@ +//===-- SafeStackLayout.cpp - SafeStack frame layout -----------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SafeStackLayout.h" + +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; +using namespace llvm::safestack; + +#define DEBUG_TYPE "safestacklayout" + +static cl::opt<bool> ClLayout("safe-stack-layout", + cl::desc("enable safe stack layout"), cl::Hidden, + cl::init(true)); + +LLVM_DUMP_METHOD void StackLayout::print(raw_ostream &OS) { + OS << "Stack regions:\n"; + for (unsigned i = 0; i < Regions.size(); ++i) { + OS << " " << i << ": [" << Regions[i].Start << ", " << Regions[i].End + << "), range " << Regions[i].Range << "\n"; + } + OS << "Stack objects:\n"; + for (auto &IT : ObjectOffsets) { + OS << " at " << IT.getSecond() << ": " << *IT.getFirst() << "\n"; + } +} + +void StackLayout::addObject(const Value *V, unsigned Size, unsigned Alignment, + const StackColoring::LiveRange &Range) { + StackObjects.push_back({V, Size, Alignment, Range}); + MaxAlignment = std::max(MaxAlignment, Alignment); +} + +static unsigned AdjustStackOffset(unsigned Offset, unsigned Size, + unsigned Alignment) { + return alignTo(Offset + Size, Alignment) - Size; +} + +void StackLayout::layoutObject(StackObject &Obj) { + if (!ClLayout) { + // If layout is disabled, just grab the next aligned address. + // This effectively disables stack coloring as well. + unsigned LastRegionEnd = Regions.empty() ? 0 : Regions.back().End; + unsigned Start = AdjustStackOffset(LastRegionEnd, Obj.Size, Obj.Alignment); + unsigned End = Start + Obj.Size; + Regions.emplace_back(Start, End, Obj.Range); + ObjectOffsets[Obj.Handle] = End; + return; + } + + DEBUG(dbgs() << "Layout: size " << Obj.Size << ", align " << Obj.Alignment + << ", range " << Obj.Range << "\n"); + assert(Obj.Alignment <= MaxAlignment); + unsigned Start = AdjustStackOffset(0, Obj.Size, Obj.Alignment); + unsigned End = Start + Obj.Size; + DEBUG(dbgs() << " First candidate: " << Start << " .. " << End << "\n"); + for (const StackRegion &R : Regions) { + DEBUG(dbgs() << " Examining region: " << R.Start << " .. " << R.End + << ", range " << R.Range << "\n"); + assert(End >= R.Start); + if (Start >= R.End) { + DEBUG(dbgs() << " Does not intersect, skip.\n"); + continue; + } + if (Obj.Range.Overlaps(R.Range)) { + // Find the next appropriate location. + Start = AdjustStackOffset(R.End, Obj.Size, Obj.Alignment); + End = Start + Obj.Size; + DEBUG(dbgs() << " Overlaps. Next candidate: " << Start << " .. " << End + << "\n"); + continue; + } + if (End <= R.End) { + DEBUG(dbgs() << " Reusing region(s).\n"); + break; + } + } + + unsigned LastRegionEnd = Regions.empty() ? 0 : Regions.back().End; + if (End > LastRegionEnd) { + // Insert a new region at the end. Maybe two. + if (Start > LastRegionEnd) { + DEBUG(dbgs() << " Creating gap region: " << LastRegionEnd << " .. " + << Start << "\n"); + Regions.emplace_back(LastRegionEnd, Start, StackColoring::LiveRange()); + LastRegionEnd = Start; + } + DEBUG(dbgs() << " Creating new region: " << LastRegionEnd << " .. " << End + << ", range " << Obj.Range << "\n"); + Regions.emplace_back(LastRegionEnd, End, Obj.Range); + LastRegionEnd = End; + } + + // Split starting and ending regions if necessary. + for (unsigned i = 0; i < Regions.size(); ++i) { + StackRegion &R = Regions[i]; + if (Start > R.Start && Start < R.End) { + StackRegion R0 = R; + R.Start = R0.End = Start; + Regions.insert(&R, R0); + continue; + } + if (End > R.Start && End < R.End) { + StackRegion R0 = R; + R0.End = R.Start = End; + Regions.insert(&R, R0); + break; + } + } + + // Update live ranges for all affected regions. + for (StackRegion &R : Regions) { + if (Start < R.End && End > R.Start) + R.Range.Join(Obj.Range); + if (End <= R.End) + break; + } + + ObjectOffsets[Obj.Handle] = End; +} + +void StackLayout::computeLayout() { + // Simple greedy algorithm. + // If this is replaced with something smarter, it must preserve the property + // that the first object is always at the offset 0 in the stack frame (for + // StackProtectorSlot), or handle stack protector in some other way. + for (auto &Obj : StackObjects) + layoutObject(Obj); + + DEBUG(print(dbgs())); +} diff --git a/contrib/llvm/lib/CodeGen/SafeStackLayout.h b/contrib/llvm/lib/CodeGen/SafeStackLayout.h new file mode 100644 index 0000000..313ed21 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SafeStackLayout.h @@ -0,0 +1,68 @@ +//===-- SafeStackLayout.h - SafeStack frame layout -------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H +#define LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H + +#include "SafeStackColoring.h" + +namespace llvm { +namespace safestack { + +/// Compute the layout of an unsafe stack frame. +class StackLayout { + unsigned MaxAlignment; + + struct StackRegion { + unsigned Start; + unsigned End; + StackColoring::LiveRange Range; + StackRegion(unsigned Start, unsigned End, + const StackColoring::LiveRange &Range) + : Start(Start), End(End), Range(Range) {} + }; + /// The list of current stack regions, sorted by StackRegion::Start. + SmallVector<StackRegion, 16> Regions; + + struct StackObject { + const Value *Handle; + unsigned Size, Alignment; + StackColoring::LiveRange Range; + }; + SmallVector<StackObject, 8> StackObjects; + + DenseMap<const Value *, unsigned> ObjectOffsets; + + void layoutObject(StackObject &Obj); + +public: + StackLayout(unsigned StackAlignment) : MaxAlignment(StackAlignment) {} + /// Add an object to the stack frame. Value pointer is opaque and used as a + /// handle to retrieve the object's offset in the frame later. + void addObject(const Value *V, unsigned Size, unsigned Alignment, + const StackColoring::LiveRange &Range); + + /// Run the layout computation for all previously added objects. + void computeLayout(); + + /// Returns the offset to the object start in the stack frame. + unsigned getObjectOffset(const Value *V) { return ObjectOffsets[V]; } + + /// Returns the size of the entire frame. + unsigned getFrameSize() { return Regions.empty() ? 0 : Regions.back().End; } + + /// Returns the alignment of the frame. + unsigned getFrameAlignment() { return MaxAlignment; } + void print(raw_ostream &OS); +}; + +} // namespace safestack +} // namespace llvm + +#endif // LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 11b246a..22bfd4d 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -14,11 +14,11 @@ #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/ADT/IntEqClasses.h" -#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -27,6 +27,8 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDFS.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" #include "llvm/IR/Operator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -36,7 +38,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include <queue> using namespace llvm; @@ -49,12 +50,51 @@ static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction")); +// Note: the two options below might be used in tuning compile time vs +// output quality. Setting HugeRegion so large that it will never be +// reached means best-effort, but may be slow. + +// When Stores and Loads maps (or NonAliasStores and NonAliasLoads) +// together hold this many SUs, a reduction of maps will be done. +static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden, + cl::init(1000), cl::desc("The limit to use while constructing the DAG " + "prior to scheduling, at which point a trade-off " + "is made to avoid excessive compile time.")); + +static cl::opt<unsigned> ReductionSize( + "dag-maps-reduction-size", cl::Hidden, + cl::desc("A huge scheduling region will have maps reduced by this many " + "nodes at a time. Defaults to HugeRegion / 2.")); + +static unsigned getReductionSize() { + // Always reduce a huge region with half of the elements, except + // when user sets this number explicitly. + if (ReductionSize.getNumOccurrences() == 0) + return HugeRegion / 2; + return ReductionSize; +} + +static void dumpSUList(ScheduleDAGInstrs::SUList &L) { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + dbgs() << "{ "; + for (auto *su : L) { + dbgs() << "SU(" << su->NodeNum << ")"; + if (su != L.back()) + dbgs() << ", "; + } + dbgs() << "}\n"; +#endif +} + ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags) : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false), - TrackLaneMasks(false), FirstDbgValue(nullptr) { + TrackLaneMasks(false), AAForDep(nullptr), BarrierChain(nullptr), + UnknownValue(UndefValue::get( + Type::getVoidTy(mf.getFunction()->getContext()))), + FirstDbgValue(nullptr) { DbgValues.clear(); const TargetSubtargetInfo &ST = mf.getSubtarget(); @@ -120,10 +160,6 @@ static void getUnderlyingObjects(const Value *V, } while (!Working.empty()); } -typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType; -typedef SmallVector<PointerIntPair<ValueType, 1, bool>, 4> -UnderlyingObjectsVector; - /// getUnderlyingObjectsForInstr - If this machine instr has memory reference /// information and it can be tracked to a normal reference to a known /// object, return the Value for that object. @@ -131,46 +167,46 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo *MFI, UnderlyingObjectsVector &Objects, const DataLayout &DL) { - if (!MI->hasOneMemOperand() || - (!(*MI->memoperands_begin())->getValue() && - !(*MI->memoperands_begin())->getPseudoValue()) || - (*MI->memoperands_begin())->isVolatile()) - return; - - if (const PseudoSourceValue *PSV = - (*MI->memoperands_begin())->getPseudoValue()) { - // Function that contain tail calls don't have unique PseudoSourceValue - // objects. Two PseudoSourceValues might refer to the same or overlapping - // locations. The client code calling this function assumes this is not the - // case. So return a conservative answer of no known object. - if (MFI->hasTailCall()) - return; + auto allMMOsOkay = [&]() { + for (const MachineMemOperand *MMO : MI->memoperands()) { + if (MMO->isVolatile()) + return false; + + if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) { + // Function that contain tail calls don't have unique PseudoSourceValue + // objects. Two PseudoSourceValues might refer to the same or + // overlapping locations. The client code calling this function assumes + // this is not the case. So return a conservative answer of no known + // object. + if (MFI->hasTailCall()) + return false; - // For now, ignore PseudoSourceValues which may alias LLVM IR values - // because the code that uses this function has no way to cope with - // such aliases. - if (!PSV->isAliased(MFI)) { - bool MayAlias = PSV->mayAlias(MFI); - Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias)); - } - return; - } + // For now, ignore PseudoSourceValues which may alias LLVM IR values + // because the code that uses this function has no way to cope with + // such aliases. + if (PSV->isAliased(MFI)) + return false; - const Value *V = (*MI->memoperands_begin())->getValue(); - if (!V) - return; + bool MayAlias = PSV->mayAlias(MFI); + Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias)); + } else if (const Value *V = MMO->getValue()) { + SmallVector<Value *, 4> Objs; + getUnderlyingObjects(V, Objs, DL); - SmallVector<Value *, 4> Objs; - getUnderlyingObjects(V, Objs, DL); + for (Value *V : Objs) { + if (!isIdentifiedObject(V)) + return false; - for (Value *V : Objs) { - if (!isIdentifiedObject(V)) { - Objects.clear(); - return; + Objects.push_back(UnderlyingObjectsVector::value_type(V, true)); + } + } else + return false; } + return true; + }; - Objects.push_back(UnderlyingObjectsVector::value_type(V, true)); - } + if (!allMMOsOkay()) + Objects.clear(); } void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) { @@ -475,10 +511,10 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { // VReg2SUnit for the non-overlapping part. LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask; LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask; - if (NonOverlapMask != 0) - CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, V2SU.SU)); V2SU.SU = SU; V2SU.LaneMask = OverlapMask; + if (NonOverlapMask != 0) + CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU)); } // If there was no CurrentVRegDefs entry for some lanes yet, create one. if (LaneMask != 0) @@ -518,84 +554,32 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { /// (like a call or something with unmodeled side effects). static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { return MI->isCall() || MI->hasUnmodeledSideEffects() || - (MI->hasOrderedMemoryRef() && - (!MI->mayLoad() || !MI->isInvariantLoad(AA))); -} - -// This MI might have either incomplete info, or known to be unsafe -// to deal with (i.e. volatile object). -static inline bool isUnsafeMemoryObject(MachineInstr *MI, - const MachineFrameInfo *MFI, - const DataLayout &DL) { - if (!MI || MI->memoperands_empty()) - return true; - // We purposefully do no check for hasOneMemOperand() here - // in hope to trigger an assert downstream in order to - // finish implementation. - if ((*MI->memoperands_begin())->isVolatile() || - MI->hasUnmodeledSideEffects()) - return true; - - if ((*MI->memoperands_begin())->getPseudoValue()) { - // Similarly to getUnderlyingObjectForInstr: - // For now, ignore PseudoSourceValues which may alias LLVM IR values - // because the code that uses this function has no way to cope with - // such aliases. - return true; - } - - const Value *V = (*MI->memoperands_begin())->getValue(); - if (!V) - return true; - - SmallVector<Value *, 4> Objs; - getUnderlyingObjects(V, Objs, DL); - for (Value *V : Objs) { - // Does this pointer refer to a distinct and identifiable object? - if (!isIdentifiedObject(V)) - return true; - } - - return false; + (MI->hasOrderedMemoryRef() && !MI->isInvariantLoad(AA)); } /// This returns true if the two MIs need a chain edge between them. -/// If these are not even memory operations, we still may need -/// chain deps between them. The question really is - could -/// these two MIs be reordered during scheduling from memory dependency -/// point of view. +/// This is called on normal stores and loads. static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, const DataLayout &DL, MachineInstr *MIa, MachineInstr *MIb) { const MachineFunction *MF = MIa->getParent()->getParent(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - // Cover a trivial case - no edge is need to itself. - if (MIa == MIb) - return false; - - // Let the target decide if memory accesses cannot possibly overlap. - if ((MIa->mayLoad() || MIa->mayStore()) && - (MIb->mayLoad() || MIb->mayStore())) - if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA)) - return false; + assert ((MIa->mayStore() || MIb->mayStore()) && + "Dependency checked between two loads"); - // FIXME: Need to handle multiple memory operands to support all targets. - if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) - return true; - - if (isUnsafeMemoryObject(MIa, MFI, DL) || isUnsafeMemoryObject(MIb, MFI, DL)) - return true; - - // If we are dealing with two "normal" loads, we do not need an edge - // between them - they could be reordered. - if (!MIa->mayStore() && !MIb->mayStore()) + // Let the target decide if memory accesses cannot possibly overlap. + if (TII->areMemAccessesTriviallyDisjoint(*MIa, *MIb, AA)) return false; // To this point analysis is generic. From here on we do need AA. if (!AA) return true; + // FIXME: Need to handle multiple memory operands to support all targets. + if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) + return true; + MachineMemOperand *MMOa = *MIa->memoperands_begin(); MachineMemOperand *MMOb = *MIb->memoperands_begin(); @@ -634,106 +618,15 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, return (AAResult != NoAlias); } -/// This recursive function iterates over chain deps of SUb looking for -/// "latest" node that needs a chain edge to SUa. -static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, - const DataLayout &DL, SUnit *SUa, SUnit *SUb, - SUnit *ExitSU, unsigned *Depth, - SmallPtrSetImpl<const SUnit *> &Visited) { - if (!SUa || !SUb || SUb == ExitSU) - return *Depth; - - // Remember visited nodes. - if (!Visited.insert(SUb).second) - return *Depth; - // If there is _some_ dependency already in place, do not - // descend any further. - // TODO: Need to make sure that if that dependency got eliminated or ignored - // for any reason in the future, we would not violate DAG topology. - // Currently it does not happen, but makes an implicit assumption about - // future implementation. - // - // Independently, if we encounter node that is some sort of global - // object (like a call) we already have full set of dependencies to it - // and we can stop descending. - if (SUa->isSucc(SUb) || - isGlobalMemoryObject(AA, SUb->getInstr())) - return *Depth; - - // If we do need an edge, or we have exceeded depth budget, - // add that edge to the predecessors chain of SUb, - // and stop descending. - if (*Depth > 200 || - MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) { - SUb->addPred(SDep(SUa, SDep::MayAliasMem)); - return *Depth; - } - // Track current depth. - (*Depth)++; - // Iterate over memory dependencies only. - for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end(); - I != E; ++I) - if (I->isNormalMemoryOrBarrier()) - iterateChainSucc(AA, MFI, DL, SUa, I->getSUnit(), ExitSU, Depth, Visited); - return *Depth; -} - -/// This function assumes that "downward" from SU there exist -/// tail/leaf of already constructed DAG. It iterates downward and -/// checks whether SU can be aliasing any node dominated -/// by it. -static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, - const DataLayout &DL, SUnit *SU, SUnit *ExitSU, - std::set<SUnit *> &CheckList, - unsigned LatencyToLoad) { - if (!SU) - return; - - SmallPtrSet<const SUnit*, 16> Visited; - unsigned Depth = 0; - - for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end(); - I != IE; ++I) { - if (SU == *I) - continue; - if (MIsNeedChainEdge(AA, MFI, DL, SU->getInstr(), (*I)->getInstr())) { - SDep Dep(SU, SDep::MayAliasMem); - Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0); - (*I)->addPred(Dep); - } - - // Iterate recursively over all previously added memory chain - // successors. Keep track of visited nodes. - for (SUnit::const_succ_iterator J = (*I)->Succs.begin(), - JE = (*I)->Succs.end(); J != JE; ++J) - if (J->isNormalMemoryOrBarrier()) - iterateChainSucc(AA, MFI, DL, SU, J->getSUnit(), ExitSU, &Depth, - Visited); - } -} - -/// Check whether two objects need a chain edge, if so, add it -/// otherwise remember the rejected SU. -static inline void addChainDependency(AliasAnalysis *AA, - const MachineFrameInfo *MFI, - const DataLayout &DL, SUnit *SUa, - SUnit *SUb, std::set<SUnit *> &RejectList, - unsigned TrueMemOrderLatency = 0, - bool isNormalMemory = false) { - // If this is a false dependency, - // do not add the edge, but remember the rejected node. - if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) { - SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier); - Dep.setLatency(TrueMemOrderLatency); +/// Check whether two objects need a chain edge and add it if needed. +void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb, + unsigned Latency) { + if (MIsNeedChainEdge(AAForDep, MFI, MF.getDataLayout(), SUa->getInstr(), + SUb->getInstr())) { + SDep Dep(SUa, SDep::MayAliasMem); + Dep.setLatency(Latency); SUb->addPred(Dep); } - else { - // Duplicate entries should be ignored. - RejectList.insert(SUb); - DEBUG(dbgs() << "\tReject chain dep between SU(" - << SUa->NodeNum << ") and SU(" - << SUb->NodeNum << ")\n"); - } } /// Create an SUnit for each real instruction, numbered in top-down topological @@ -752,16 +645,15 @@ void ScheduleDAGInstrs::initSUnits() { // which is contained within a basic block. SUnits.reserve(NumRegionInstrs); - for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) { - MachineInstr *MI = I; - if (MI->isDebugValue()) + for (MachineInstr &MI : llvm::make_range(RegionBegin, RegionEnd)) { + if (MI.isDebugValue()) continue; - SUnit *SU = newSUnit(MI); - MISUnitMap[MI] = SU; + SUnit *SU = newSUnit(&MI); + MISUnitMap[&MI] = SU; - SU->isCall = MI->isCall(); - SU->isCommutable = MI->isCommutable(); + SU->isCall = MI.isCall(); + SU->isCommutable = MI.isCommutable(); // Assign the Latency field of SU using target-provided information. SU->Latency = SchedModel.computeInstrLatency(SU->getInstr()); @@ -808,6 +700,19 @@ void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + // Ignore re-defs. + if (TrackLaneMasks) { + bool FoundDef = false; + for (const MachineOperand &MO2 : MI->operands()) { + if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) { + FoundDef = true; + break; + } + } + if (FoundDef) + continue; + } + // Record this local VReg use. VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg); for (; UI != VRegUses.end(); ++UI) { @@ -819,17 +724,136 @@ void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) { } } +class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> { + + /// Current total number of SUs in map. + unsigned NumNodes; + + /// 1 for loads, 0 for stores. (see comment in SUList) + unsigned TrueMemOrderLatency; +public: + + Value2SUsMap(unsigned lat = 0) : NumNodes(0), TrueMemOrderLatency(lat) {} + + /// To keep NumNodes up to date, insert() is used instead of + /// this operator w/ push_back(). + ValueType &operator[](const SUList &Key) { + llvm_unreachable("Don't use. Use insert() instead."); }; + + /// Add SU to the SUList of V. If Map grows huge, reduce its size + /// by calling reduce(). + void inline insert(SUnit *SU, ValueType V) { + MapVector::operator[](V).push_back(SU); + NumNodes++; + } + + /// Clears the list of SUs mapped to V. + void inline clearList(ValueType V) { + iterator Itr = find(V); + if (Itr != end()) { + assert (NumNodes >= Itr->second.size()); + NumNodes -= Itr->second.size(); + + Itr->second.clear(); + } + } + + /// Clears map from all contents. + void clear() { + MapVector<ValueType, SUList>::clear(); + NumNodes = 0; + } + + unsigned inline size() const { return NumNodes; } + + /// Count the number of SUs in this map after a reduction. + void reComputeSize(void) { + NumNodes = 0; + for (auto &I : *this) + NumNodes += I.second.size(); + } + + unsigned inline getTrueMemOrderLatency() const { + return TrueMemOrderLatency; + } + + void dump(); +}; + +void ScheduleDAGInstrs::addChainDependencies(SUnit *SU, + Value2SUsMap &Val2SUsMap) { + for (auto &I : Val2SUsMap) + addChainDependencies(SU, I.second, + Val2SUsMap.getTrueMemOrderLatency()); +} + +void ScheduleDAGInstrs::addChainDependencies(SUnit *SU, + Value2SUsMap &Val2SUsMap, + ValueType V) { + Value2SUsMap::iterator Itr = Val2SUsMap.find(V); + if (Itr != Val2SUsMap.end()) + addChainDependencies(SU, Itr->second, + Val2SUsMap.getTrueMemOrderLatency()); +} + +void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) { + assert (BarrierChain != nullptr); + + for (auto &I : map) { + SUList &sus = I.second; + for (auto *SU : sus) + SU->addPredBarrier(BarrierChain); + } + map.clear(); +} + +void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) { + assert (BarrierChain != nullptr); + + // Go through all lists of SUs. + for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) { + Value2SUsMap::iterator CurrItr = I++; + SUList &sus = CurrItr->second; + SUList::iterator SUItr = sus.begin(), SUEE = sus.end(); + for (; SUItr != SUEE; ++SUItr) { + // Stop on BarrierChain or any instruction above it. + if ((*SUItr)->NodeNum <= BarrierChain->NodeNum) + break; + + (*SUItr)->addPredBarrier(BarrierChain); + } + + // Remove also the BarrierChain from list if present. + if (SUItr != SUEE && *SUItr == BarrierChain) + SUItr++; + + // Remove all SUs that are now successors of BarrierChain. + if (SUItr != sus.begin()) + sus.erase(sus.begin(), SUItr); + } + + // Remove all entries with empty su lists. + map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) { + return (mapEntry.second.empty()); }); + + // Recompute the size of the map (NumNodes). + map.reComputeSize(); +} + /// If RegPressure is non-null, compute register pressure as a side effect. The /// DAG builder is an efficient place to do it because it already visits /// operands. void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker, PressureDiffs *PDiffs, + LiveIntervals *LIS, bool TrackLaneMasks) { const TargetSubtargetInfo &ST = MF.getSubtarget(); bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI : ST.useAA(); - AliasAnalysis *AAForDep = UseAA ? AA : nullptr; + AAForDep = UseAA ? AA : nullptr; + + BarrierChain = nullptr; this->TrackLaneMasks = TrackLaneMasks; MISUnitMap.clear(); @@ -841,19 +865,25 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (PDiffs) PDiffs->init(SUnits.size()); - // We build scheduling units by walking a block's instruction list from bottom - // to top. - - // Remember where a generic side-effecting instruction is as we proceed. - SUnit *BarrierChain = nullptr, *AliasChain = nullptr; - - // Memory references to specific known memory locations are tracked - // so that they can be given more precise dependencies. We track - // separately the known memory locations that may alias and those - // that are known not to alias - MapVector<ValueType, std::vector<SUnit *> > AliasMemDefs, NonAliasMemDefs; - MapVector<ValueType, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; - std::set<SUnit*> RejectMemNodes; + // We build scheduling units by walking a block's instruction list + // from bottom to top. + + // Each MIs' memory operand(s) is analyzed to a list of underlying + // objects. The SU is then inserted in the SUList(s) mapped from the + // Value(s). Each Value thus gets mapped to lists of SUs depending + // on it, stores and loads kept separately. Two SUs are trivially + // non-aliasing if they both depend on only identified Values and do + // not share any common Value. + Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/); + + // Certain memory accesses are known to not alias any SU in Stores + // or Loads, and have therefore their own 'NonAlias' + // domain. E.g. spill / reload instructions never alias LLVM I/R + // Values. It would be nice to assume that this type of memory + // accesses always have a proper memory operand modelling, and are + // therefore never unanalyzable, but this is conservatively not + // done. + Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/); // Remove any stale debug info; sometimes BuildSchedGraph is called again // without emitting the info from the previous call. @@ -882,283 +912,201 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, MachineInstr *DbgMI = nullptr; for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; MII != MIE; --MII) { - MachineInstr *MI = std::prev(MII); - if (MI && DbgMI) { - DbgValues.push_back(std::make_pair(DbgMI, MI)); + MachineInstr &MI = *std::prev(MII); + if (DbgMI) { + DbgValues.push_back(std::make_pair(DbgMI, &MI)); DbgMI = nullptr; } - if (MI->isDebugValue()) { - DbgMI = MI; + if (MI.isDebugValue()) { + DbgMI = &MI; continue; } - SUnit *SU = MISUnitMap[MI]; + SUnit *SU = MISUnitMap[&MI]; assert(SU && "No SUnit mapped to this MI"); if (RPTracker) { collectVRegUses(SU); RegisterOperands RegOpers; - RegOpers.collect(*MI, *TRI, MRI); + RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false); + if (TrackLaneMasks) { + SlotIndex SlotIdx = LIS->getInstructionIndex(MI); + RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx); + } if (PDiffs != nullptr) PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI); RPTracker->recedeSkipDebugValues(); - assert(&*RPTracker->getPos() == MI && "RPTracker in sync"); + assert(&*RPTracker->getPos() == &MI && "RPTracker in sync"); RPTracker->recede(RegOpers); } assert( - (CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) && + (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) && "Cannot schedule terminators or labels!"); // Add register-based dependencies (data, anti, and output). + // For some instructions (calls, returns, inline-asm, etc.) there can + // be explicit uses and implicit defs, in which case the use will appear + // on the operand list before the def. Do two passes over the operand + // list to make sure that defs are processed before any uses. bool HasVRegDef = false; - for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { - const MachineOperand &MO = MI->getOperand(j); - if (!MO.isReg()) continue; + for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) { + const MachineOperand &MO = MI.getOperand(j); + if (!MO.isReg() || !MO.isDef()) + continue; unsigned Reg = MO.getReg(); - if (Reg == 0) continue; + if (Reg == 0) + continue; if (TRI->isPhysicalRegister(Reg)) addPhysRegDeps(SU, j); else { - if (MO.isDef()) { - HasVRegDef = true; - addVRegDefDeps(SU, j); - } - else if (MO.readsReg()) // ignore undef operands - addVRegUseDeps(SU, j); + HasVRegDef = true; + addVRegDefDeps(SU, j); } } + // Now process all uses. + for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) { + const MachineOperand &MO = MI.getOperand(j); + // Only look at use operands. + // We do not need to check for MO.readsReg() here because subsequent + // subregister defs will get output dependence edges and need no + // additional use dependencies. + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + + if (TRI->isPhysicalRegister(Reg)) + addPhysRegDeps(SU, j); + else if (MO.readsReg()) // ignore undef operands + addVRegUseDeps(SU, j); + } + // If we haven't seen any uses in this scheduling region, create a // dependence edge to ExitSU to model the live-out latency. This is required // for vreg defs with no in-region use, and prefetches with no vreg def. // // FIXME: NumDataSuccs would be more precise than NumSuccs here. This // check currently relies on being called before adding chain deps. - if (SU->NumSuccs == 0 && SU->Latency > 1 - && (HasVRegDef || MI->mayLoad())) { + if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) { SDep Dep(SU, SDep::Artificial); Dep.setLatency(SU->Latency - 1); ExitSU.addPred(Dep); } - // Add chain dependencies. - // Chain dependencies used to enforce memory order should have - // latency of 0 (except for true dependency of Store followed by - // aliased Load... we estimate that with a single cycle of latency - // assuming the hardware will bypass) - // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable - // after stack slots are lowered to actual addresses. - // TODO: Use an AliasAnalysis and do real alias-analysis queries, and - // produce more precise dependence information. - unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0; - if (isGlobalMemoryObject(AA, MI)) { - // Be conservative with these and add dependencies on all memory - // references, even those that are known to not alias. - for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = - NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { - for (unsigned i = 0, e = I->second.size(); i != e; ++i) { - I->second[i]->addPred(SDep(SU, SDep::Barrier)); - } - } - for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = - NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { - for (unsigned i = 0, e = I->second.size(); i != e; ++i) { - SDep Dep(SU, SDep::Barrier); - Dep.setLatency(TrueMemOrderLatency); - I->second[i]->addPred(Dep); - } - } - // Add SU to the barrier chain. + // Add memory dependencies (Note: isStoreToStackSlot and + // isLoadFromStackSLot are not usable after stack slots are lowered to + // actual addresses). + + // This is a barrier event that acts as a pivotal node in the DAG. + if (isGlobalMemoryObject(AA, &MI)) { + + // Become the barrier chain. if (BarrierChain) - BarrierChain->addPred(SDep(SU, SDep::Barrier)); + BarrierChain->addPredBarrier(SU); BarrierChain = SU; - // This is a barrier event that acts as a pivotal node in the DAG, - // so it is safe to clear list of exposed nodes. - adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, - TrueMemOrderLatency); - RejectMemNodes.clear(); - NonAliasMemDefs.clear(); - NonAliasMemUses.clear(); - - // fall-through - new_alias_chain: - // Chain all possibly aliasing memory references through SU. - if (AliasChain) { - unsigned ChainLatency = 0; - if (AliasChain->getInstr()->mayLoad()) - ChainLatency = TrueMemOrderLatency; - addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, - RejectMemNodes, ChainLatency); - } - AliasChain = SU; - for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, - PendingLoads[k], RejectMemNodes, - TrueMemOrderLatency); - for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = - AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) { - for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, - I->second[i], RejectMemNodes); - } - for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = - AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { - for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, - I->second[i], RejectMemNodes, TrueMemOrderLatency); - } - // This call must come after calls to addChainDependency() since it - // consumes the 'RejectMemNodes' list that addChainDependency() possibly - // adds to. - adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, - TrueMemOrderLatency); - PendingLoads.clear(); - AliasMemDefs.clear(); - AliasMemUses.clear(); - } else if (MI->mayStore()) { - // Add dependence on barrier chain, if needed. - // There is no point to check aliasing on barrier event. Even if - // SU and barrier _could_ be reordered, they should not. In addition, - // we have lost all RejectMemNodes below barrier. - if (BarrierChain) - BarrierChain->addPred(SDep(SU, SDep::Barrier)); - UnderlyingObjectsVector Objs; - getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout()); + DEBUG(dbgs() << "Global memory object and new barrier chain: SU(" + << BarrierChain->NodeNum << ").\n";); - if (Objs.empty()) { - // Treat all other stores conservatively. - goto new_alias_chain; - } + // Add dependencies against everything below it and clear maps. + addBarrierChain(Stores); + addBarrierChain(Loads); + addBarrierChain(NonAliasStores); + addBarrierChain(NonAliasLoads); - bool MayAlias = false; - for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end(); - K != KE; ++K) { - ValueType V = K->getPointer(); - bool ThisMayAlias = K->getInt(); - if (ThisMayAlias) - MayAlias = true; - - // A store to a specific PseudoSourceValue. Add precise dependencies. - // Record the def in MemDefs, first adding a dep if there is - // an existing def. - MapVector<ValueType, std::vector<SUnit *> >::iterator I = - ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); - MapVector<ValueType, std::vector<SUnit *> >::iterator IE = - ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); - if (I != IE) { - for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, - I->second[i], RejectMemNodes, 0, true); - - // If we're not using AA, then we only need one store per object. - if (!AAForDep) - I->second.clear(); - I->second.push_back(SU); - } else { - if (ThisMayAlias) { - if (!AAForDep) - AliasMemDefs[V].clear(); - AliasMemDefs[V].push_back(SU); - } else { - if (!AAForDep) - NonAliasMemDefs[V].clear(); - NonAliasMemDefs[V].push_back(SU); - } + continue; + } + + // If it's not a store or a variant load, we're done. + if (!MI.mayStore() && !(MI.mayLoad() && !MI.isInvariantLoad(AA))) + continue; + + // Always add dependecy edge to BarrierChain if present. + if (BarrierChain) + BarrierChain->addPredBarrier(SU); + + // Find the underlying objects for MI. The Objs vector is either + // empty, or filled with the Values of memory locations which this + // SU depends on. An empty vector means the memory location is + // unknown, and may alias anything. + UnderlyingObjectsVector Objs; + getUnderlyingObjectsForInstr(&MI, MFI, Objs, MF.getDataLayout()); + + if (MI.mayStore()) { + if (Objs.empty()) { + // An unknown store depends on all stores and loads. + addChainDependencies(SU, Stores); + addChainDependencies(SU, NonAliasStores); + addChainDependencies(SU, Loads); + addChainDependencies(SU, NonAliasLoads); + + // Map this store to 'UnknownValue'. + Stores.insert(SU, UnknownValue); + } else { + // Add precise dependencies against all previously seen memory + // accesses mapped to the same Value(s). + for (const UnderlyingObject &UnderlObj : Objs) { + ValueType V = UnderlObj.getValue(); + bool ThisMayAlias = UnderlObj.mayAlias(); + + // Add dependencies to previous stores and loads mapped to V. + addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V); + addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V); } - // Handle the uses in MemUses, if there are any. - MapVector<ValueType, std::vector<SUnit *> >::iterator J = - ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); - MapVector<ValueType, std::vector<SUnit *> >::iterator JE = - ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); - if (J != JE) { - for (unsigned i = 0, e = J->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, - J->second[i], RejectMemNodes, - TrueMemOrderLatency, true); - J->second.clear(); + // Update the store map after all chains have been added to avoid adding + // self-loop edge if multiple underlying objects are present. + for (const UnderlyingObject &UnderlObj : Objs) { + ValueType V = UnderlObj.getValue(); + bool ThisMayAlias = UnderlObj.mayAlias(); + + // Map this store to V. + (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V); } + // The store may have dependencies to unanalyzable loads and + // stores. + addChainDependencies(SU, Loads, UnknownValue); + addChainDependencies(SU, Stores, UnknownValue); } - if (MayAlias) { - // Add dependencies from all the PendingLoads, i.e. loads - // with no underlying object. - for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, - PendingLoads[k], RejectMemNodes, - TrueMemOrderLatency); - // Add dependence on alias chain, if needed. - if (AliasChain) - addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, - RejectMemNodes); - } - // This call must come after calls to addChainDependency() since it - // consumes the 'RejectMemNodes' list that addChainDependency() possibly - // adds to. - adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, - TrueMemOrderLatency); - } else if (MI->mayLoad()) { - bool MayAlias = true; - if (MI->isInvariantLoad(AA)) { - // Invariant load, no chain dependencies needed! + } else { // SU is a load. + if (Objs.empty()) { + // An unknown load depends on all stores. + addChainDependencies(SU, Stores); + addChainDependencies(SU, NonAliasStores); + + Loads.insert(SU, UnknownValue); } else { - UnderlyingObjectsVector Objs; - getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout()); - - if (Objs.empty()) { - // A load with no underlying object. Depend on all - // potentially aliasing stores. - for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = - AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) - for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, - I->second[i], RejectMemNodes); - - PendingLoads.push_back(SU); - MayAlias = true; - } else { - MayAlias = false; - } + for (const UnderlyingObject &UnderlObj : Objs) { + ValueType V = UnderlObj.getValue(); + bool ThisMayAlias = UnderlObj.mayAlias(); + + // Add precise dependencies against all previously seen stores + // mapping to the same Value(s). + addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V); - for (UnderlyingObjectsVector::iterator - J = Objs.begin(), JE = Objs.end(); J != JE; ++J) { - ValueType V = J->getPointer(); - bool ThisMayAlias = J->getInt(); - - if (ThisMayAlias) - MayAlias = true; - - // A load from a specific PseudoSourceValue. Add precise dependencies. - MapVector<ValueType, std::vector<SUnit *> >::iterator I = - ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); - MapVector<ValueType, std::vector<SUnit *> >::iterator IE = - ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); - if (I != IE) - for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, - I->second[i], RejectMemNodes, 0, true); - if (ThisMayAlias) - AliasMemUses[V].push_back(SU); - else - NonAliasMemUses[V].push_back(SU); + // Map this load to V. + (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V); } - // Add dependencies on alias and barrier chains, if needed. - if (MayAlias && AliasChain) - addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, - RejectMemNodes); - if (MayAlias) - // This call must come after calls to addChainDependency() since it - // consumes the 'RejectMemNodes' list that addChainDependency() - // possibly adds to. - adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, - RejectMemNodes, /*Latency=*/0); - if (BarrierChain) - BarrierChain->addPred(SDep(SU, SDep::Barrier)); + // The load may have dependencies to unanalyzable stores. + addChainDependencies(SU, Stores, UnknownValue); } } + + // Reduce maps if they grow huge. + if (Stores.size() + Loads.size() >= HugeRegion) { + DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";); + reduceHugeMemNodeMaps(Stores, Loads, getReductionSize()); + } + if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) { + DEBUG(dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";); + reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize()); + } } + if (DbgMI) FirstDbgValue = DbgMI; @@ -1166,7 +1114,84 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, Uses.clear(); CurrentVRegDefs.clear(); CurrentVRegUses.clear(); - PendingLoads.clear(); +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) { + PSV->printCustom(OS); + return OS; +} + +void ScheduleDAGInstrs::Value2SUsMap::dump() { + for (auto &Itr : *this) { + if (Itr.first.is<const Value*>()) { + const Value *V = Itr.first.get<const Value*>(); + if (isa<UndefValue>(V)) + dbgs() << "Unknown"; + else + V->printAsOperand(dbgs()); + } + else if (Itr.first.is<const PseudoSourceValue*>()) + dbgs() << Itr.first.get<const PseudoSourceValue*>(); + else + llvm_unreachable("Unknown Value type."); + + dbgs() << " : "; + dumpSUList(Itr.second); + } +} + +/// Reduce maps in FIFO order, by N SUs. This is better than turning +/// every Nth memory SU into BarrierChain in buildSchedGraph(), since +/// it avoids unnecessary edges between seen SUs above the new +/// BarrierChain, and those below it. +void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores, + Value2SUsMap &loads, unsigned N) { + DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n"; + stores.dump(); + dbgs() << "Loading SUnits:\n"; + loads.dump()); + + // Insert all SU's NodeNums into a vector and sort it. + std::vector<unsigned> NodeNums; + NodeNums.reserve(stores.size() + loads.size()); + for (auto &I : stores) + for (auto *SU : I.second) + NodeNums.push_back(SU->NodeNum); + for (auto &I : loads) + for (auto *SU : I.second) + NodeNums.push_back(SU->NodeNum); + std::sort(NodeNums.begin(), NodeNums.end()); + + // The N last elements in NodeNums will be removed, and the SU with + // the lowest NodeNum of them will become the new BarrierChain to + // let the not yet seen SUs have a dependency to the removed SUs. + assert (N <= NodeNums.size()); + SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)]; + if (BarrierChain) { + // The aliasing and non-aliasing maps reduce independently of each + // other, but share a common BarrierChain. Check if the + // newBarrierChain is above the former one. If it is not, it may + // introduce a loop to use newBarrierChain, so keep the old one. + if (newBarrierChain->NodeNum < BarrierChain->NodeNum) { + BarrierChain->addPredBarrier(newBarrierChain); + BarrierChain = newBarrierChain; + DEBUG(dbgs() << "Inserting new barrier chain: SU(" + << BarrierChain->NodeNum << ").\n";); + } + else + DEBUG(dbgs() << "Keeping old barrier chain: SU(" + << BarrierChain->NodeNum << ").\n";); + } + else + BarrierChain = newBarrierChain; + + insertBarrierChain(stores); + insertBarrierChain(loads); + + DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n"; + stores.dump(); + dbgs() << "Loading SUnits:\n"; + loads.dump()); } /// \brief Initialize register live-range state for updating kills. @@ -1190,7 +1215,8 @@ void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) { /// operands, then we also need to propagate that to any instructions inside /// the bundle which had the same kill state. static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg, - bool NewKillState) { + bool NewKillState, + const TargetRegisterInfo *TRI) { if (MI->getOpcode() != TargetOpcode::BUNDLE) return; @@ -1199,30 +1225,13 @@ static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg, // might set it on too many operands. We will clear as many flags as we // can though. MachineBasicBlock::instr_iterator Begin = MI->getIterator(); - MachineBasicBlock::instr_iterator End = getBundleEnd(MI); + MachineBasicBlock::instr_iterator End = getBundleEnd(*MI); while (Begin != End) { - for (MachineOperand &MO : (--End)->operands()) { - if (!MO.isReg() || MO.isDef() || Reg != MO.getReg()) - continue; - - // DEBUG_VALUE nodes do not contribute to code generation and should - // always be ignored. Failure to do so may result in trying to modify - // KILL flags on DEBUG_VALUE nodes, which is distressing. - if (MO.isDebug()) - continue; - - // If the register has the internal flag then it could be killing an - // internal def of the register. In this case, just skip. We only want - // to toggle the flag on operands visible outside the bundle. - if (MO.isInternalRead()) - continue; - - if (MO.isKill() == NewKillState) - continue; - MO.setIsKill(NewKillState); - if (NewKillState) - return; - } + if (NewKillState) { + if ((--End)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false)) + return; + } else + (--End)->clearRegisterKills(Reg, TRI); } } @@ -1230,21 +1239,21 @@ bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) { // Setting kill flag... if (!MO.isKill()) { MO.setIsKill(true); - toggleBundleKillFlag(MI, MO.getReg(), true); + toggleBundleKillFlag(MI, MO.getReg(), true, TRI); return false; } // If MO itself is live, clear the kill flag... if (LiveRegs.test(MO.getReg())) { MO.setIsKill(false); - toggleBundleKillFlag(MI, MO.getReg(), false); + toggleBundleKillFlag(MI, MO.getReg(), false, TRI); return false; } // If any subreg of MO is live, then create an imp-def for that // subreg and keep MO marked as killed. MO.setIsKill(false); - toggleBundleKillFlag(MI, MO.getReg(), false); + toggleBundleKillFlag(MI, MO.getReg(), false, TRI); bool AllDead = true; const unsigned SuperReg = MO.getReg(); MachineInstrBuilder MIB(MF, MI); @@ -1257,7 +1266,7 @@ bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) { if(AllDead) { MO.setIsKill(true); - toggleBundleKillFlag(MI, MO.getReg(), true); + toggleBundleKillFlag(MI, MO.getReg(), true, TRI); } return false; } @@ -1275,15 +1284,15 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { unsigned Count = MBB->size(); for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); I != E; --Count) { - MachineInstr *MI = --I; - if (MI->isDebugValue()) + MachineInstr &MI = *--I; + if (MI.isDebugValue()) continue; // Update liveness. Registers that are defed but not used in this // instruction are now dead. Mark register and all subregs as they // are completely defined. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); if (MO.isRegMask()) LiveRegs.clearBitsNotInMask(MO.getRegMask()); if (!MO.isReg()) continue; @@ -1291,7 +1300,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { if (Reg == 0) continue; if (!MO.isDef()) continue; // Ignore two-addr defs. - if (MI->isRegTiedToUseOperand(i)) continue; + if (MI.isRegTiedToUseOperand(i)) continue; // Repeat for reg and all subregs. for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); @@ -1303,8 +1312,8 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { // register is used multiple times we only set the kill flag on // the first use. Don't set kill flags on undef operands. killedRegs.reset(); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; unsigned Reg = MO.getReg(); if ((Reg == 0) || MRI.isReserved(Reg)) continue; @@ -1329,13 +1338,15 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { if (MO.isKill() != kill) { DEBUG(dbgs() << "Fixing " << MO << " in "); // Warning: toggleKillFlag may invalidate MO. - toggleKillFlag(MI, MO); - DEBUG(MI->dump()); - DEBUG(if (MI->getOpcode() == TargetOpcode::BUNDLE) { - MachineBasicBlock::instr_iterator Begin = MI->getIterator(); - MachineBasicBlock::instr_iterator End = getBundleEnd(MI); - while (++Begin != End) - DEBUG(Begin->dump()); + toggleKillFlag(&MI, MO); + DEBUG(MI.dump()); + DEBUG({ + if (MI.getOpcode() == TargetOpcode::BUNDLE) { + MachineBasicBlock::instr_iterator Begin = MI.getIterator(); + MachineBasicBlock::instr_iterator End = getBundleEnd(MI); + while (++Begin != End) + DEBUG(Begin->dump()); + } }); } @@ -1344,8 +1355,8 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { // Mark any used register (that is not using undef) and subregs as // now live... - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; unsigned Reg = MO.getReg(); if ((Reg == 0) || MRI.isReserved(Reg)) continue; diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp index 1150d26..ca2881c 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp index 38833a4..69c4870 100644 --- a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -23,22 +23,13 @@ using namespace llvm; -#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType +#define DEBUG_TYPE DebugType -#ifndef NDEBUG -const char *ScoreboardHazardRecognizer::DebugType = ""; -#endif - -ScoreboardHazardRecognizer:: -ScoreboardHazardRecognizer(const InstrItineraryData *II, - const ScheduleDAG *SchedDAG, - const char *ParentDebugType) : - ScheduleHazardRecognizer(), ItinData(II), DAG(SchedDAG), IssueWidth(0), - IssueCount(0) { - -#ifndef NDEBUG - DebugType = ParentDebugType; -#endif +ScoreboardHazardRecognizer::ScoreboardHazardRecognizer( + const InstrItineraryData *II, const ScheduleDAG *SchedDAG, + const char *ParentDebugType) + : ScheduleHazardRecognizer(), DebugType(ParentDebugType), ItinData(II), + DAG(SchedDAG), IssueWidth(0), IssueCount(0) { // Determine the maximum depth of any itinerary. This determines the depth of // the scoreboard. We always make the scoreboard at least 1 cycle deep to @@ -91,7 +82,7 @@ void ScoreboardHazardRecognizer::Reset() { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void ScoreboardHazardRecognizer::Scoreboard::dump() const { +LLVM_DUMP_METHOD void ScoreboardHazardRecognizer::Scoreboard::dump() const { dbgs() << "Scoreboard:\n"; unsigned last = Depth - 1; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c741982..5ecc6da 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -112,7 +113,7 @@ namespace { /// /// This is used to allow us to reliably add any operands of a DAG node /// which have not yet been combined to the worklist. - SmallPtrSet<SDNode *, 64> CombinedNodes; + SmallPtrSet<SDNode *, 32> CombinedNodes; // AA - Used for DAG load/store alias analysis. AliasAnalysis &AA; @@ -211,8 +212,8 @@ namespace { SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); - void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, - SDValue Trunc, SDValue ExtLoad, SDLoc DL, + void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc, + SDValue ExtLoad, const SDLoc &DL, ISD::NodeType ExtType); /// Call the node-specific routine that knows how to fold each @@ -258,6 +259,7 @@ namespace { SDValue visitSRL(SDNode *N); SDValue visitRotate(SDNode *N); SDValue visitBSWAP(SDNode *N); + SDValue visitBITREVERSE(SDNode *N); SDValue visitCTLZ(SDNode *N); SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); SDValue visitCTTZ(SDNode *N); @@ -273,6 +275,7 @@ namespace { SDValue visitANY_EXTEND(SDNode *N); SDValue visitSIGN_EXTEND_INREG(SDNode *N); SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); + SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N); SDValue visitTRUNCATE(SDNode *N); SDValue visitBITCAST(SDNode *N); SDValue visitBUILD_PAIR(SDNode *N); @@ -326,18 +329,19 @@ namespace { SDValue visitFMULForFMACombine(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); - SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); + SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS, + SDValue RHS); SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); - SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2); - SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, - SDValue N3, ISD::CondCode CC, + SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2); + SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, + SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare = false); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, - SDLoc DL, bool foldBooleans = true); + const SDLoc &DL, bool foldBooleans = true); bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC) const; @@ -353,19 +357,21 @@ namespace { SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags); - SDValue BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags); - SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, - SDNodeFlags *Flags); - SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, - SDNodeFlags *Flags); + SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags); + SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags); + SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, bool Recip); + SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, + SDNodeFlags *Flags, bool Reciprocal); + SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, + SDNodeFlags *Flags, bool Reciprocal); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, unsigned NegOpcode, - SDLoc DL); - SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL); + const SDLoc &DL); + SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue TransformFPLoadStorePair(SDNode *N); @@ -386,10 +392,17 @@ namespace { /// chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); - /// Do FindBetterChain for a store and any possibly adjacent stores on - /// consecutive chains. + /// Try to replace a store and any possibly adjacent stores on + /// consecutive chains with better chains. Return true only if St is + /// replaced. + /// + /// Notice that other chains may still be replaced even if the function + /// returns false. bool findBetterNeighborChains(StoreSDNode *St); + /// Match "(X shl/srl V1) & V2" where V2 may not be present. + bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask); + /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. struct MemOpLink { @@ -414,8 +427,7 @@ namespace { /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a /// constant build_vector of the stored constant values in Stores. - SDValue getMergedConstantVectorStore(SelectionDAG &DAG, - SDLoc SL, + SDValue getMergedConstantVectorStore(SelectionDAG &DAG, const SDLoc &SL, ArrayRef<MemOpLink> Stores, SmallVectorImpl<SDValue> &Chains, EVT Ty) const; @@ -444,6 +456,12 @@ namespace { StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes, SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes); + /// Helper function for MergeConsecutiveStores. Checks if + /// Candidate stores have indirect dependency through their + /// operands. \return True if safe to merge + bool checkMergeStoreCandidatesForDependencies( + SmallVectorImpl<MemOpLink> &StoreNodes); + /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return True if some memory operations were changed. @@ -747,32 +765,6 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const { return false; } -/// Returns true if N is a BUILD_VECTOR node whose -/// elements are all the same constant or undefined. -static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { - BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); - if (!C) - return false; - - APInt SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - EVT EltVT = N->getValueType(0).getVectorElementType(); - return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, - HasAnyUndefs) && - EltVT.getSizeInBits() >= SplatBitSize); -} - -// \brief Returns the SDNode if it is a constant integer BuildVector -// or constant integer. -static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) { - if (isa<ConstantSDNode>(N)) - return N.getNode(); - if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) - return N.getNode(); - return nullptr; -} - // \brief Returns the SDNode if it is a constant float BuildVector // or constant float. static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { @@ -821,12 +813,12 @@ static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) { return nullptr; } -SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, - SDValue N0, SDValue N1) { +SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, + SDValue N1) { EVT VT = N0.getValueType(); if (N0.getOpcode() == Opc) { - if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { - if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) { + if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { + if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) { // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R)) return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); @@ -845,17 +837,17 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, } if (N1.getOpcode() == Opc) { - if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { - if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) { + if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { + if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) { // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L)) return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); return SDValue(); } if (N1.hasOneUse()) { - // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one + // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one // use - SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0)); if (!OpNode.getNode()) return SDValue(); AddToWorklist(OpNode.getNode()); @@ -962,7 +954,8 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { Replace = false; SDLoc dl(Op); - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { + if (ISD::isUNINDEXEDLoad(Op.getNode())) { + LoadSDNode *LD = cast<LoadSDNode>(Op); EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD @@ -1166,6 +1159,9 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { if (!LegalOperations) return false; + if (!ISD::isUNINDEXEDLoad(Op.getNode())) + return false; + EVT VT = Op.getValueType(); if (VT.isVector() || !VT.isInteger()) return false; @@ -1259,8 +1255,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // changes of the root. HandleSDNode Dummy(DAG.getRoot()); - // while the worklist isn't empty, find a node and - // try and combine it. + // While the worklist isn't empty, find a node and try to combine it. while (!WorklistMap.empty()) { SDNode *N; // The Worklist holds the SDNodes in order, but it may contain null entries. @@ -1326,8 +1321,6 @@ void DAGCombiner::Run(CombineLevel AtLevel) { DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG)); - // Transfer debug value. - DAG.TransferDbgValues(SDValue(N, 0), RV); if (N->getNumValues() == RV.getNode()->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode()); else { @@ -1388,6 +1381,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ROTR: case ISD::ROTL: return visitRotate(N); case ISD::BSWAP: return visitBSWAP(N); + case ISD::BITREVERSE: return visitBITREVERSE(N); case ISD::CTLZ: return visitCTLZ(N); case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); case ISD::CTTZ: return visitCTTZ(N); @@ -1403,6 +1397,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ANY_EXTEND: return visitANY_EXTEND(N); case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); + case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N); case ISD::TRUNCATE: return visitTRUNCATE(N); case ISD::BITCAST: return visitBITCAST(N); case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); @@ -1628,8 +1623,8 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } -/// If \p N is a ContantSDNode with isOpaque() == false return it casted to a -/// ContantSDNode pointer else nullptr. +/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a +/// ConstantSDNode pointer else nullptr. static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N); return Const != nullptr && !Const->isOpaque() ? Const : nullptr; @@ -1653,38 +1648,32 @@ SDValue DAGCombiner::visitADD(SDNode *N) { } // fold (add x, undef) -> undef - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) return N0; - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) return N1; - // fold (add c1, c2) -> c1+c2 - ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); - ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C); - // canonicalize constant to RHS - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { + // canonicalize constant to RHS + if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); + // fold (add c1, c2) -> c1+c2 + return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, + N0.getNode(), N1.getNode()); + } // fold (add x, 0) -> x if (isNullConstant(N1)) return N0; - // fold (add Sym, c) -> Sym+c - if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) - if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && - GA->getOpcode() == ISD::GlobalAddress) - return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, - GA->getOffset() + - (uint64_t)N1C->getSExtValue()); // fold ((c1-A)+c2) -> (c1+c2)-A - if (N1C && N0.getOpcode() == ISD::SUB) - if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) { - SDLoc DL(N); - return DAG.getNode(ISD::SUB, DL, VT, - DAG.getConstant(N1C->getAPIntValue()+ - N0C->getAPIntValue(), DL, VT), - N0.getOperand(1)); - } + if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1)) { + if (N0.getOpcode() == ISD::SUB) + if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) { + SDLoc DL(N); + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(N1C->getAPIntValue()+ + N0C->getAPIntValue(), DL, VT), + N0.getOperand(1)); + } + } // reassociate add if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1)) return RADD; @@ -1850,9 +1839,9 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { // Since it may not be valid to emit a fold to zero for vector initializers // check if we can before folding. -static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, - SelectionDAG &DAG, - bool LegalOperations, bool LegalTypes) { +static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, + SelectionDAG &DAG, bool LegalOperations, + bool LegalTypes) { if (!VT.isVector()) return DAG.getConstant(0, DL, VT); if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) @@ -1879,11 +1868,14 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // FIXME: Refactor this and xor and other similar operations together. if (N0 == N1) return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); - // fold (sub c1, c2) -> c1-c2 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + DAG.isConstantIntBuildVectorOrConstantInt(N1)) { + // fold (sub c1, c2) -> c1-c2 + return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, + N0.getNode(), N1.getNode()); + } ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C); // fold (sub x, c) -> (add x, -c) if (N1C) { SDLoc DL(N); @@ -1933,9 +1925,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { N0.getOperand(0), N0.getOperand(1).getOperand(0)); // If either operand of a sub is undef, the result is undef - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) return N0; - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) return N1; // If the relocation model supports it, consider symbol offsets. @@ -2013,7 +2005,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { EVT VT = N0.getValueType(); // fold (mul x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); bool N0IsConst = false; @@ -2026,8 +2018,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; - N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); - N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); + N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0); + N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); } else { N0IsConst = isa<ConstantSDNode>(N0); if (N0IsConst) { @@ -2047,8 +2039,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { N0.getNode(), N1.getNode()); // canonicalize constant to RHS (vector doesn't have to splat) - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); // fold (mul x, 0) -> 0 if (N1IsConst && ConstValue1 == 0) @@ -2091,23 +2083,21 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { APInt Val; // (mul (shl X, c1), c2) -> (mul X, c2 << c1) if (N1IsConst && N0.getOpcode() == ISD::SHL && - (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || - isa<ConstantSDNode>(N0.getOperand(1)))) { - SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, - N1, N0.getOperand(1)); + (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa<ConstantSDNode>(N0.getOperand(1)))) { + SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); AddToWorklist(C3.getNode()); - return DAG.getNode(ISD::MUL, SDLoc(N), VT, - N0.getOperand(0), C3); + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); } // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one // use. { - SDValue Sh(nullptr,0), Y(nullptr,0); + SDValue Sh(nullptr, 0), Y(nullptr, 0); // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). if (N0.getOpcode() == ISD::SHL && - (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || - isa<ConstantSDNode>(N0.getOperand(1))) && + (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa<ConstantSDNode>(N0.getOperand(1))) && N0.getNode()->hasOneUse()) { Sh = N0; Y = N1; } else if (N1.getOpcode() == ISD::SHL && @@ -2117,17 +2107,15 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } if (Sh.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, - Sh.getOperand(0), Y); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, - Mul, Sh.getOperand(1)); + SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y); + return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1)); } } // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) - if (isConstantIntBuildVectorOrConstantInt(N1) && + if (DAG.isConstantIntBuildVectorOrConstantInt(N1) && N0.getOpcode() == ISD::ADD && - isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && + DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && isMulAddWithConstProfitable(N, N0, N1)) return DAG.getNode(ISD::ADD, SDLoc(N), VT, DAG.getNode(ISD::MUL, SDLoc(N0), VT, @@ -2146,7 +2134,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI) { RTLIB::Libcall LC; - switch (Node->getSimpleValueType(0).SimpleTy) { + EVT NodeType = Node->getValueType(0); + if (!NodeType.isSimple()) + return false; + switch (NodeType.getSimpleVT().SimpleTy) { default: return false; // No libcall for vector types. case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; @@ -2163,14 +2154,18 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) { if (Node->use_empty()) return SDValue(); // This is a dead node, leave it alone. + unsigned Opcode = Node->getOpcode(); + bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + + // DivMod lib calls can still work on non-legal types if using lib-calls. EVT VT = Node->getValueType(0); - if (!TLI.isTypeLegal(VT)) + if (VT.isVector() || !VT.isInteger()) return SDValue(); - unsigned Opcode = Node->getOpcode(); - bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); + if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT)) + return SDValue(); - unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; // If DIVREM is going to get expanded into a libcall, // but there is no libcall available, then don't combine. if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) && @@ -2314,10 +2309,10 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return DivRem; // undef / X -> 0 - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) return DAG.getConstant(0, DL, VT); // X / undef -> undef - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) return N1; return SDValue(); @@ -2378,10 +2373,10 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { return DivRem; // undef / X -> 0 - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) return DAG.getConstant(0, DL, VT); // X / undef -> undef - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) return N1; return SDValue(); @@ -2419,15 +2414,13 @@ SDValue DAGCombiner::visitREM(SDNode *N) { } // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) if (N1.getOpcode() == ISD::SHL) { - if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { - if (SHC->getAPIntValue().isPowerOf2()) { - SDValue Add = - DAG.getNode(ISD::ADD, DL, VT, N1, - DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, - VT)); - AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::AND, DL, VT, N0, Add); - } + ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0)); + if (SHC && SHC->getAPIntValue().isPowerOf2()) { + APInt NegOne = APInt::getAllOnesValue(VT.getSizeInBits()); + SDValue Add = + DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT)); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::AND, DL, VT, N0, Add); } } } @@ -2462,10 +2455,10 @@ SDValue DAGCombiner::visitREM(SDNode *N) { return DivRem.getValue(1); // undef % X -> 0 - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) return DAG.getConstant(0, DL, VT); // X % undef -> undef - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) return N1; return SDValue(); @@ -2489,7 +2482,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { getShiftAmountTy(N0.getValueType()))); } // fold (mulhs x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); // If the type twice as wide is legal, transform the mulhs to a wider multiply @@ -2525,7 +2518,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { if (isOneConstant(N1)) return DAG.getConstant(0, DL, N0.getValueType()); // fold (mulhu x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, DL, VT); // If the type twice as wide is legal, transform the mulhu to a wider multiply @@ -2698,8 +2691,8 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); return SDValue(); @@ -2761,7 +2754,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { } // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) - // Only perform this optimization after type legalization and before + // Only perform this optimization up until type legalization, before // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and // we don't want to undo this promotion. @@ -2769,7 +2762,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // on scalars. if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && - Level == AfterLegalizeTypes) { + Level <= AfterLegalizeTypes) { SDValue In0 = N0.getOperand(0); SDValue In1 = N1.getOperand(0); EVT In0Ty = In0.getValueType(); @@ -2814,7 +2807,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // Don't try to fold this node if it requires introducing a // build vector of all zeros that might be illegal at this stage. - if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { + if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) { if (!LegalTypes) ShOp = DAG.getConstant(0, SDLoc(N), VT); else @@ -2829,13 +2822,13 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { N0->getOperand(0), N1->getOperand(0)); AddToWorklist(NewNode.getNode()); return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, - &SVN0->getMask()[0]); + SVN0->getMask()); } // Don't try to fold this node if it requires introducing a // build vector of all zeros that might be illegal at this stage. ShOp = N0->getOperand(0); - if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { + if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) { if (!LegalTypes) ShOp = DAG.getConstant(0, SDLoc(N), VT); else @@ -2850,7 +2843,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { N0->getOperand(1), N1->getOperand(1)); AddToWorklist(NewNode.getNode()); return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, - &SVN0->getMask()[0]); + SVN0->getMask()); } } } @@ -2867,7 +2860,7 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, EVT VT = N1.getValueType(); // fold (and x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, SDLoc(LocReference), VT); // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) SDValue LL, LR, RL, RR, CC0, CC1; @@ -2965,6 +2958,50 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, } } + // Reduce bit extract of low half of an integer to the narrower type. + // (and (srl i64:x, K), KMask) -> + // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask) + if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { + if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) { + if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + unsigned Size = VT.getSizeInBits(); + const APInt &AndMask = CAnd->getAPIntValue(); + unsigned ShiftBits = CShift->getZExtValue(); + unsigned MaskBits = AndMask.countTrailingOnes(); + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2); + + if (APIntOps::isMask(AndMask) && + // Required bits must not span the two halves of the integer and + // must fit in the half size type. + (ShiftBits + MaskBits <= Size / 2) && + TLI.isNarrowingProfitable(VT, HalfVT) && + TLI.isTypeDesirableForOp(ISD::AND, HalfVT) && + TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) && + TLI.isTruncateFree(VT, HalfVT) && + TLI.isZExtFree(HalfVT, VT)) { + // The isNarrowingProfitable is to avoid regressions on PPC and + // AArch64 which match a few 64-bit bit insert / bit extract patterns + // on downstream users of this. Those patterns could probably be + // extended to handle extensions mixed in. + + SDValue SL(N0); + assert(ShiftBits != 0 && MaskBits <= Size); + + // Extracting the highest bit of the low half. + EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout()); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT, + N0.getOperand(0)); + + SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT); + SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT); + SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK); + SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask); + return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And); + } + } + } + } + return SDValue(); } @@ -3045,8 +3082,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); // fold (and x, -1) -> x if (isAllOnesConstant(N1)) @@ -3090,8 +3127,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // the 'X' node here can either be nothing or an extract_vector_elt to catch // more cases. if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && - N0.getOperand(0).getOpcode() == ISD::LOAD) || - N0.getOpcode() == ISD::LOAD) { + N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() && + N0.getOperand(0).getOpcode() == ISD::LOAD && + N0.getOperand(0).getResNo() == 0) || + (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) { LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0) ); @@ -3234,12 +3273,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { AddToWorklist(NewPtr.getNode()); - SDValue Load = - DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, - LN0->getChain(), NewPtr, - LN0->getPointerInfo(), - ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), Alignment, LN0->getAAInfo()); + SDValue Load = DAG.getExtLoad( + ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr, + LN0->getPointerInfo(), ExtVT, Alignment, + LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); AddToWorklist(N); CombineTo(LN0, Load, Load.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -3303,9 +3340,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { - SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), - N0.getOperand(1), false); - if (BSwap.getNode()) + if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false)) return BSwap; } @@ -3576,7 +3612,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { EVT VT = N1.getValueType(); // fold (or x, undef) -> -1 if (!LegalOperations && - (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { + (N0.isUndef() || N1.isUndef())) { EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), SDLoc(LocReference), VT); @@ -3697,59 +3733,70 @@ SDValue DAGCombiner::visitOR(SDNode *N) { N1.getValueType().getScalarType().getSizeInBits()), SDLoc(N), N1.getValueType()); - // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) - // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) + // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask) // Do this only if the resulting shuffle is legal. if (isa<ShuffleVectorSDNode>(N0) && isa<ShuffleVectorSDNode>(N1) && // Avoid folding a node with illegal type. - TLI.isTypeLegal(VT) && - N0->getOperand(1) == N1->getOperand(1) && - ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) { - bool CanFold = true; - unsigned NumElts = VT.getVectorNumElements(); - const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); - const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); - // We construct two shuffle masks: - // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand - // and N1 as the second operand. - // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand - // and N0 as the second operand. - // We do this because OR is commutable and therefore there might be - // two ways to fold this node into a shuffle. - SmallVector<int,4> Mask1; - SmallVector<int,4> Mask2; - - for (unsigned i = 0; i != NumElts && CanFold; ++i) { - int M0 = SV0->getMaskElt(i); - int M1 = SV1->getMaskElt(i); - - // Both shuffle indexes are undef. Propagate Undef. - if (M0 < 0 && M1 < 0) { - Mask1.push_back(M0); - Mask2.push_back(M0); - continue; - } + TLI.isTypeLegal(VT)) { + bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode()); + bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()); + bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); + bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode()); + // Ensure both shuffles have a zero input. + if ((ZeroN00 || ZeroN01) && (ZeroN10 || ZeroN11)) { + assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!"); + assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!"); + const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); + const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); + bool CanFold = true; + int NumElts = VT.getVectorNumElements(); + SmallVector<int, 4> Mask(NumElts); + + for (int i = 0; i != NumElts; ++i) { + int M0 = SV0->getMaskElt(i); + int M1 = SV1->getMaskElt(i); + + // Determine if either index is pointing to a zero vector. + bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts)); + bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts)); + + // If one element is zero and the otherside is undef, keep undef. + // This also handles the case that both are undef. + if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) { + Mask[i] = -1; + continue; + } - if (M0 < 0 || M1 < 0 || - (M0 < (int)NumElts && M1 < (int)NumElts) || - (M0 >= (int)NumElts && M1 >= (int)NumElts)) { - CanFold = false; - break; + // Make sure only one of the elements is zero. + if (M0Zero == M1Zero) { + CanFold = false; + break; + } + + assert((M0 >= 0 || M1 >= 0) && "Undef index!"); + + // We have a zero and non-zero element. If the non-zero came from + // SV0 make the index a LHS index. If it came from SV1, make it + // a RHS index. We need to mod by NumElts because we don't care + // which operand it came from in the original shuffles. + Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts; } - Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts); - Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts); - } + if (CanFold) { + SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0); + SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0); + + bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT); + if (!LegalMask) { + std::swap(NewLHS, NewRHS); + ShuffleVectorSDNode::commuteMask(Mask); + LegalMask = TLI.isShuffleMaskLegal(Mask, VT); + } - if (CanFold) { - // Fold this sequence only if the resulting shuffle is 'legal'. - if (TLI.isShuffleMaskLegal(Mask1, VT)) - return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), - N1->getOperand(0), &Mask1[0]); - if (TLI.isShuffleMaskLegal(Mask2, VT)) - return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0), - N0->getOperand(0), &Mask2[0]); + if (LegalMask) + return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask); + } } } } @@ -3760,8 +3807,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); // fold (or x, 0) -> x if (isNullConstant(N1)) @@ -3817,9 +3864,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } /// Match "(X shl/srl V1) & V2" where V2 may not be present. -static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { +bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { if (Op.getOpcode() == ISD::AND) { - if (isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { + if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { Mask = Op.getOperand(1); Op = Op.getOperand(0); } else { @@ -3946,7 +3993,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, - unsigned NegOpcode, SDLoc DL) { + unsigned NegOpcode, const SDLoc &DL) { // fold (or (shl x, (*ext y)), // (srl x, (*ext (sub 32, y)))) -> // (rotl x, y) or (rotr x, (sub 32, y)) @@ -3967,7 +4014,7 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. -SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { +SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); if (!TLI.isTypeLegal(VT)) return nullptr; @@ -4093,12 +4140,12 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold (xor undef, undef) -> 0. This is a common idiom (misuse). - if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) + if (N0.isUndef() && N1.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); // fold (xor x, undef) -> undef - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) return N0; - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) return N1; // fold (xor c1, c2) -> c1^c2 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); @@ -4106,8 +4153,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C); // canonicalize constant to RHS - if (isConstantIntBuildVectorOrConstantInt(N0) && - !isConstantIntBuildVectorOrConstantInt(N1)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold (xor x, 0) -> x if (isNullConstant(N1)) @@ -4342,8 +4389,8 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { - SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode()); - if (NewOp1.getNode()) + if (SDValue NewOp1 = + distributeTruncateThroughAnd(N->getOperand(1).getNode())) return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), N->getOperand(0), NewOp1); } @@ -4398,7 +4445,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N1C && N1C->isNullValue()) return N0; // fold (shl undef, x) -> 0 - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); // if (shl x, c) is known to be zero, return 0 if (DAG.MaskedValueIsZero(SDValue(N, 0), @@ -4407,8 +4454,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { - SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); - if (NewOp1.getNode()) + if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); } @@ -4541,7 +4587,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { APInt Val; if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && (isa<ConstantSDNode>(N0.getOperand(1)) || - isConstantSplatVector(N0.getOperand(1).getNode(), Val))) { + ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val))) { SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); @@ -4637,7 +4683,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); // Determine the residual right-shift amount. - signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); + int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); // If the shift is not a no-op (in which case this should be just a sign // extend already), the truncated to type is legal, sign_extend is legal @@ -4664,8 +4710,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { - SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); - if (NewOp1.getNode()) + if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); } @@ -4916,7 +4961,7 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) { EVT VT = N->getValueType(0); // fold (bswap c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0); // fold (bswap (bswap x)) -> x if (N0.getOpcode() == ISD::BSWAP) @@ -4924,12 +4969,21 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitBITREVERSE(SDNode *N) { + SDValue N0 = N->getOperand(0); + + // fold (bitreverse (bitreverse x)) -> x + if (N0.getOpcode() == ISD::BITREVERSE) + return N0.getOperand(0); + return SDValue(); +} + SDValue DAGCombiner::visitCTLZ(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); // fold (ctlz c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4939,7 +4993,7 @@ SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { EVT VT = N->getValueType(0); // fold (ctlz_zero_undef c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4949,7 +5003,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { EVT VT = N->getValueType(0); // fold (cttz c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4959,7 +5013,7 @@ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { EVT VT = N->getValueType(0); // fold (cttz_zero_undef c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4969,15 +5023,15 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { EVT VT = N->getValueType(0); // fold (ctpop c1) -> c2 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); return SDValue(); } /// \brief Generate Min/Max node -static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, - SDValue True, SDValue False, +static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, + SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG) { if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) @@ -5237,7 +5291,7 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { // length of the BV and see if all the non-undef nodes are the same. ConstantSDNode *BottomHalf = nullptr; for (int i = 0; i < NumElems / 2; ++i) { - if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) + if (Cond->getOperand(i)->isUndef()) continue; if (BottomHalf == nullptr) @@ -5249,7 +5303,7 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { // Do the same for the second half of the BuildVector ConstantSDNode *TopHalf = nullptr; for (int i = NumElems / 2; i < NumElems; ++i) { - if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) + if (Cond->getOperand(i)->isUndef()) continue; if (TopHalf == nullptr) @@ -5666,9 +5720,8 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { return N2; // Determine if the condition we're dealing with is constant - SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), - N0, N1, CC, SDLoc(N), false); - if (SCC.getNode()) { + if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, + CC, SDLoc(N), false)) { AddToWorklist(SCC.getNode()); if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { @@ -5676,7 +5729,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { return N2; // cond always true -> true val else return N3; // cond always false -> false val - } else if (SCC->getOpcode() == ISD::UNDEF) { + } else if (SCC->isUndef()) { // When the condition is UNDEF, just return the first operand. This is // coherent the DAG creation, no setcc node is created in this case return N2; @@ -5729,7 +5782,8 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, EVT VT = N->getValueType(0); assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || - Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) + Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || + Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) && "Expected EXTEND dag node in input!"); // fold (sext c1) -> c1 @@ -5756,7 +5810,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, for (unsigned i=0; i != NumElts; ++i) { SDValue Op = N0->getOperand(i); - if (Op->getOpcode() == ISD::UNDEF) { + if (Op->isUndef()) { Elts.push_back(DAG.getUNDEF(SVT)); continue; } @@ -5771,7 +5825,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT)); } - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode(); + return DAG.getBuildVector(VT, DL, Elts).getNode(); } // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: @@ -5839,8 +5893,8 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, } void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, - SDValue Trunc, SDValue ExtLoad, SDLoc DL, - ISD::NodeType ExtType) { + SDValue Trunc, SDValue ExtLoad, + const SDLoc &DL, ISD::NodeType ExtType) { // Extend SetCC uses if necessary. for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { SDNode *SetCC = SetCCs[i]; @@ -5929,9 +5983,8 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { SDValue SplitLoad = DAG.getExtLoad( ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr, - LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, - LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(), - Align, LN0->getAAInfo()); + LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align, + LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, DAG.getConstant(Stride, DL, BasePtr.getValueType())); @@ -6145,16 +6198,30 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } } - // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0) - unsigned ElementWidth = VT.getScalarType().getSizeInBits(); + // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0) + // Here, T can be 1 or -1, depending on the type of the setcc and + // getBooleanContents(). + unsigned SetCCWidth = N0.getValueType().getScalarSizeInBits(); + SDLoc DL(N); - SDValue NegOne = - DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT); - SDValue SCC = - SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), - NegOne, DAG.getConstant(0, DL, VT), - cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); - if (SCC.getNode()) return SCC; + // To determine the "true" side of the select, we need to know the high bit + // of the value returned by the setcc if it evaluates to true. + // If the type of the setcc is i1, then the true case of the select is just + // sext(i1 1), that is, -1. + // If the type of the setcc is larger (say, i8) then the value of the high + // bit depends on getBooleanContents(). So, ask TLI for a real "true" value + // of the appropriate width. + SDValue ExtTrueVal = + (SetCCWidth == 1) + ? DAG.getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()), + DL, VT) + : TLI.getConstTrueVal(DAG, VT, DL); + + if (SDValue SCC = SimplifySelectCC( + DL, N0.getOperand(0), N0.getOperand(1), ExtTrueVal, + DAG.getConstant(0, DL, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true)) + return SCC; if (!VT.isVector()) { EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType()); @@ -6162,10 +6229,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) { SDLoc DL(N); ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); - SDValue SetCC = DAG.getSetCC(DL, SetCCVT, - N0.getOperand(0), N0.getOperand(1), CC); - return DAG.getSelect(DL, VT, SetCC, - NegOne, DAG.getConstant(0, DL, VT)); + SDValue SetCC = + DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC); + return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, + DAG.getConstant(0, DL, VT)); } } } @@ -6436,56 +6503,48 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } if (N0.getOpcode() == ISD::SETCC) { + // Only do this before legalize for now. if (!LegalOperations && VT.isVector() && N0.getValueType().getVectorElementType() == MVT::i1) { - EVT N0VT = N0.getOperand(0).getValueType(); - if (getSetCCResultType(N0VT) == N0.getValueType()) + EVT N00VT = N0.getOperand(0).getValueType(); + if (getSetCCResultType(N00VT) == N0.getValueType()) return SDValue(); - // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. - // Only do this before legalize for now. - EVT EltVT = VT.getVectorElementType(); + // We know that the # elements of the results is the same as the # + // elements of the compare (and the # elements of the compare result for + // that matter). Check to see that they are the same size. If so, we know + // that the element size of the sext'd result matches the element size of + // the compare operands. SDLoc DL(N); - SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), - DAG.getConstant(1, DL, EltVT)); - if (VT.getSizeInBits() == N0VT.getSizeInBits()) - // We know that the # elements of the results is the same as the - // # elements of the compare (and the # elements of the compare result - // for that matter). Check to see that they are the same size. If so, - // we know that the element size of the sext'd result matches the - // element size of the compare operands. - return DAG.getNode(ISD::AND, DL, VT, - DAG.getSetCC(DL, VT, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()), - DAG.getNode(ISD::BUILD_VECTOR, DL, VT, - OneOps)); + SDValue VecOnes = DAG.getConstant(1, DL, VT); + if (VT.getSizeInBits() == N00VT.getSizeInBits()) { + // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. + SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0), + N0.getOperand(1), N0.getOperand(2)); + return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes); + } // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then - // truncate/sign extend - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); + // truncate/sign extend. + EVT MatchingElementType = EVT::getIntegerVT( + *DAG.getContext(), N00VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = EVT::getVectorVT( + *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements()); SDValue VsetCC = - DAG.getSetCC(DL, MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getNode(ISD::AND, DL, VT, - DAG.getSExtOrTrunc(VsetCC, DL, VT), - DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps)); + DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), N0.getOperand(2)); + return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT), + VecOnes); } // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDLoc DL(N); - SDValue SCC = - SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), - DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), - cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); - if (SCC.getNode()) return SCC; + if (SDValue SCC = SimplifySelectCC( + DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT), + DAG.getConstant(0, DL, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true)) + return SCC; } // (zext (shl (zext x), cst)) -> (shl (zext x), cst) @@ -6660,11 +6719,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDLoc DL(N); - SDValue SCC = - SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), - DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), - cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); - if (SCC.getNode()) + if (SDValue SCC = SimplifySelectCC( + DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT), + DAG.getConstant(0, DL, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true)) return SCC; } @@ -6854,15 +6912,14 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue Load; if (ExtType == ISD::NON_EXTLOAD) - Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, - LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), NewAlign, LN0->getAAInfo()); + Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign, + LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); else - Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr, - LN0->getPointerInfo().getWithOffset(PtrOff), - ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), NewAlign, LN0->getAAInfo()); + Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, + NewAlign, LN0->getMemOperand()->getFlags(), + LN0->getAAInfo()); // Replace the old load's chain with the new load's chain. WorklistRemover DeadNodes(*this); @@ -6902,7 +6959,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return DAG.getUNDEF(VT); // fold (sext_in_reg c1) -> c1 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); // If the input is already sign extended, just drop the extension. @@ -6988,9 +7045,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { - SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), - N0.getOperand(1), false); - if (BSwap.getNode()) + if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false)) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1); } @@ -7002,7 +7058,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (N0.getOpcode() == ISD::UNDEF) + if (N0.isUndef()) + return DAG.getUNDEF(VT); + + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); + + return SDValue(); +} + +SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (N0.isUndef()) return DAG.getUNDEF(VT); if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, @@ -7021,7 +7091,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (N0.getValueType() == N->getValueType(0)) return N0; // fold (truncate c1) -> c1 - if (isConstantIntBuildVectorOrConstantInt(N0)) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); // fold (truncate (truncate x)) -> (truncate x) if (N0.getOpcode() == ISD::TRUNCATE) @@ -7030,12 +7100,11 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { + // if the source is smaller than the dest, we still need an extend. if (N0.getOperand(0).getValueType().bitsLT(VT)) - // if the source is smaller than the dest, we still need an extend - return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, - N0.getOperand(0)); + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); + // if the source is larger than the dest, than we just need the truncate. if (N0.getOperand(0).getValueType().bitsGT(VT)) - // if the source is larger than the dest, than we just need the truncate return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); // if the source and dest are the same type, we can drop both the extend // and the truncate. @@ -7071,12 +7140,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); - SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), - NVT, N0.getOperand(0)); - SDLoc DL(N); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - DL, TrTy, V, + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy, + DAG.getBitcast(NVT, N0.getOperand(0)), DAG.getConstant(Index, DL, IndexTy)); } } @@ -7094,6 +7160,25 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } + // trunc (shl x, K) -> shl (trunc x), K => K < vt.size / 2 + if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) && + TLI.isTypeDesirableForOp(ISD::SHL, VT)) { + if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t Amt = CAmt->getZExtValue(); + unsigned Size = VT.getSizeInBits(); + + if (Amt < Size / 2) { + SDLoc SL(N); + EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); + return DAG.getNode(ISD::SHL, SL, VT, Trunc, + DAG.getConstant(Amt, SL, AmtVT)); + } + } + } + // Fold a series of buildvector, bitcast, and truncate if possible. // For example fold // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to @@ -7121,7 +7206,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) Opnds.push_back(BuildVect.getOperand(i)); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); + return DAG.getBuildVector(VT, SDLoc(N), Opnds); } } @@ -7131,10 +7216,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // Currently we only perform this optimization on scalars because vectors // may have different active low bits. if (!VT.isVector()) { - SDValue Shorter = - GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), - VT.getSizeInBits())); - if (Shorter.getNode()) + if (SDValue Shorter = + GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), + VT.getSizeInBits()))) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); } // fold (truncate (load x)) -> (smaller load x) @@ -7168,7 +7252,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { SDValue X = N0.getOperand(i); - if (X.getOpcode() != ISD::UNDEF) { + if (!X.isUndef()) { V = X; Idx = i; NumDefs++; @@ -7200,6 +7284,24 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } + // Fold truncate of a bitcast of a vector to an extract of the low vector + // element. + // + // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0 + if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) { + SDValue VecSrc = N0.getOperand(0); + EVT SrcVT = VecSrc.getValueType(); + if (SrcVT.isVector() && SrcVT.getScalarType() == VT && + (!LegalOperations || + TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) { + SDLoc SL(N); + + EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, + VecSrc, DAG.getConstant(0, SL, IdxVT)); + } + } + // Simplify the operands using demanded-bits information. if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) @@ -7226,23 +7328,17 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { LD1->getAddressSpace() != LD2->getAddressSpace()) return SDValue(); EVT LD1VT = LD1->getValueType(0); - - if (ISD::isNON_EXTLoad(LD2) && - LD2->hasOneUse() && - // If both are volatile this would reduce the number of volatile loads. - // If one is volatile it might be ok, but play conservative and bail out. - !LD1->isVolatile() && - !LD2->isVolatile() && - DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { + unsigned LD1Bytes = LD1VT.getSizeInBits() / 8; + if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && + DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) { unsigned Align = LD1->getAlignment(); unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( VT.getTypeForEVT(*DAG.getContext())); if (NewAlign <= Align && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) - return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), - LD1->getBasePtr(), LD1->getPointerInfo(), - false, false, false, Align); + return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), + LD1->getPointerInfo(), Align); } return SDValue(); @@ -7254,6 +7350,49 @@ static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) { return DAG.getDataLayout().isBigEndian() ? 1 : 0; } +static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG, + const TargetLowering &TLI) { + // If this is not a bitcast to an FP type or if the target doesn't have + // IEEE754-compliant FP logic, we're done. + EVT VT = N->getValueType(0); + if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT)) + return SDValue(); + + // TODO: Use splat values for the constant-checking below and remove this + // restriction. + SDValue N0 = N->getOperand(0); + EVT SourceVT = N0.getValueType(); + if (SourceVT.isVector()) + return SDValue(); + + unsigned FPOpcode; + APInt SignMask; + switch (N0.getOpcode()) { + case ISD::AND: + FPOpcode = ISD::FABS; + SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits()); + break; + case ISD::XOR: + FPOpcode = ISD::FNEG; + SignMask = APInt::getSignBit(SourceVT.getSizeInBits()); + break; + // TODO: ISD::OR --> ISD::FNABS? + default: + return SDValue(); + } + + // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X + // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X + SDValue LogicOp0 = N0.getOperand(0); + ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask && + LogicOp0.getOpcode() == ISD::BITCAST && + LogicOp0->getOperand(0).getValueType() == VT) + return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0)); + + return SDValue(); +} + SDValue DAGCombiner::visitBITCAST(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -7284,13 +7423,12 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { TLI.isOperationLegal(ISD::ConstantFP, VT)) || (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && TLI.isOperationLegal(ISD::Constant, VT))) - return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); + return DAG.getBitcast(VT, N0); } // (conv (conv x, t1), t2) -> (conv x, t2) if (N0.getOpcode() == ISD::BITCAST) - return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, - N0.getOperand(0)); + return DAG.getBitcast(VT, N0.getOperand(0)); // fold (conv (load x)) -> (load (conv*)x) // If the resultant load doesn't need a higher alignment than the original! @@ -7303,21 +7441,24 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - unsigned Align = DAG.getDataLayout().getABITypeAlignment( - VT.getTypeForEVT(*DAG.getContext())); unsigned OrigAlign = LN0->getAlignment(); - if (Align <= OrigAlign) { - SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), OrigAlign, - LN0->getAAInfo()); + bool Fast = false; + if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + LN0->getAddressSpace(), OrigAlign, &Fast) && + Fast) { + SDValue Load = + DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), OrigAlign, + LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); return Load; } } + if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI)) + return V; + // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) // @@ -7334,8 +7475,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector() && !N0.getValueType().isVector()) { - SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, - N0.getOperand(0)); + SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0)); AddToWorklist(NewConv.getNode()); SDLoc DL(N); @@ -7388,8 +7528,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); if (isTypeLegal(IntXVT)) { - SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0), - IntXVT, N0.getOperand(1)); + SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1)); AddToWorklist(X.getNode()); // If X has a different width than the result/lhs, sext it or truncate it. @@ -7412,11 +7551,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2); - SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(0)), VT, - N0.getOperand(0)); + SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); AddToWorklist(Cst.getNode()); - SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(1)), VT, - N0.getOperand(1)); + SDValue X = DAG.getBitcast(VT, N0.getOperand(1)); AddToWorklist(X.getNode()); SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X); AddToWorklist(XorResult.getNode()); @@ -7439,8 +7576,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { X, DAG.getConstant(SignBit, SDLoc(X), VT)); AddToWorklist(X.getNode()); - SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), - VT, N0.getOperand(0)); + SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT)); AddToWorklist(Cst.getNode()); @@ -7472,7 +7608,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { return SDValue(Op.getOperand(0)); if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) - return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); + return DAG.getBitcast(VT, Op); return SDValue(); }; @@ -7529,8 +7665,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // we can end up with a scalar-to-vector node here. if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, - DAG.getNode(ISD::BITCAST, SDLoc(BV), - DstEltVT, BV->getOperand(0))); + DAG.getBitcast(DstEltVT, BV->getOperand(0))); SmallVector<SDValue, 8> Ops; for (SDValue Op : BV->op_values()) { @@ -7538,11 +7673,10 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // are promoted and implicitly truncated. Make that explicit here. if (Op.getValueType() != SrcEltVT) Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); - Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV), - DstEltVT, Op)); + Ops.push_back(DAG.getBitcast(DstEltVT, Op)); AddToWorklist(Ops.back().getNode()); } - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); + return DAG.getBuildVector(VT, SDLoc(BV), Ops); } // Otherwise, we're growing or shrinking the elements. To avoid having to @@ -7584,7 +7718,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // Shift the previously computed bits over. NewBits <<= SrcBitSize; SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); - if (Op.getOpcode() == ISD::UNDEF) continue; + if (Op.isUndef()) continue; EltIsUndef = false; NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). @@ -7598,7 +7732,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { } EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + return DAG.getBuildVector(VT, DL, Ops); } // Finally, this must be the case where we are shrinking elements: each input @@ -7609,7 +7743,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { SmallVector<SDValue, 8> Ops; for (const SDValue &Op : BV->op_values()) { - if (Op.getOpcode() == ISD::UNDEF) { + if (Op.isUndef()) { Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); continue; } @@ -7628,7 +7762,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); } - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + return DAG.getBuildVector(VT, DL, Ops); } /// Try to perform FMA combining on a given FADD node. @@ -7654,6 +7788,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (!HasFMAD && !HasFMA) return SDValue(); + const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); + ; + if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel)) + return SDValue(); + // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); @@ -7837,6 +7976,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (!HasFMAD && !HasFMA) return SDValue(); + const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); + if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel)) + return SDValue(); + // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); @@ -8305,7 +8448,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { AddToWorklist(Fused.getNode()); return Fused; } - return SDValue(); } @@ -8662,7 +8804,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. if (N1CFP) { // Compute the reciprocal 1.0 / c2. - APFloat N1APF = N1CFP->getValueAPF(); + const APFloat &N1APF = N1CFP->getValueAPF(); APFloat Recip(N1APF.getSemantics(), 1); // 1.0 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); // Only do the transform if the reciprocal is a legal fp immediate that @@ -8681,12 +8823,12 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { - if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) { return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); } } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0), + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); AddToWorklist(RV.getNode()); @@ -8694,7 +8836,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } } else if (N1.getOpcode() == ISD::FP_ROUND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { - if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0), + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) { RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); AddToWorklist(RV.getNode()); @@ -8715,7 +8857,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (SqrtOp.getNode()) { // We found a FSQRT, so try to make this fold: // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) - if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) { RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags); AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); @@ -8772,27 +8914,7 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { // For now, create a Flags object for use with all unsafe math transforms. SDNodeFlags Flags; Flags.setUnsafeAlgebra(true); - - // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) - SDValue RV = BuildRsqrtEstimate(N->getOperand(0), &Flags); - if (!RV) - return SDValue(); - - EVT VT = RV.getValueType(); - SDLoc DL(N); - RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV, &Flags); - AddToWorklist(RV.getNode()); - - // Unfortunately, RV is now NaN if the input was exactly 0. - // Select out this case and force the answer to 0. - SDValue Zero = DAG.getConstantFP(0.0, DL, VT); - EVT CCVT = getSetCCResultType(VT); - SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ); - AddToWorklist(ZeroCmp.getNode()); - AddToWorklist(RV.getNode()); - - return DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, - ZeroCmp, Zero, RV); + return buildSqrtEstimate(N->getOperand(0), &Flags); } /// copysign(x, fp_extend(y)) -> copysign(x, y) @@ -8868,7 +8990,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp - if (isConstantIntBuildVectorOrConstantInt(N0) && + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -8922,7 +9044,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp - if (isConstantIntBuildVectorOrConstantInt(N0) && + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) @@ -8993,9 +9115,7 @@ static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) { } if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits()) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src); - if (SrcVT == VT) - return Src; - return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Src); + return DAG.getBitcast(VT, Src); } return SDValue(); } @@ -9040,6 +9160,17 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { if (N0.getOpcode() == ISD::FP_ROUND) { const bool NIsTrunc = N->getConstantOperandVal(1) == 1; const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1; + + // Skip this folding if it results in an fp_round from f80 to f16. + // + // f80 to f16 always generates an expensive (and as yet, unimplemented) + // libcall to __truncxfhf2 instead of selecting native f16 conversion + // instructions from f32 or f64. Moreover, the first (value-preserving) + // fp_round from f80 to either f32 or f64 may become a NOP in platforms like + // x86. + if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16) + return SDValue(); + // If the first fp_round isn't a value preserving truncation, it might // introduce a tie in the second fp_round, that wouldn't occur in the // single-step fp_round we want to fold to. @@ -9198,7 +9329,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int, DAG.getConstant(SignMask, DL0, IntVT)); AddToWorklist(Int.getNode()); - return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); + return DAG.getBitcast(VT, Int); } } @@ -9303,7 +9434,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { Int = DAG.getNode(ISD::AND, DL, IntVT, Int, DAG.getConstant(SignMask, DL, IntVT)); AddToWorklist(Int.getNode()); - return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); + return DAG.getBitcast(N->getValueType(0), Int); } } @@ -9607,6 +9738,11 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { return false; } + // Caches for hasPredecessorHelper. + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + Worklist.push_back(N); + // If the offset is a constant, there may be other adds of constants that // can be folded with this one. We should do this to avoid having to keep // a copy of the original base pointer. @@ -9621,7 +9757,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { if (Use.getUser() == Ptr.getNode() || Use != BasePtr) continue; - if (Use.getUser()->isPredecessorOf(N)) + if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist)) continue; if (Use.getUser()->getOpcode() != ISD::ADD && @@ -9651,14 +9787,10 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Now check for #3 and #4. bool RealUse = false; - // Caches for hasPredecessorHelper - SmallPtrSet<const SDNode *, 32> Visited; - SmallVector<const SDNode *, 16> Worklist; - for (SDNode *Use : Ptr.getNode()->uses()) { if (Use == N) continue; - if (N->hasPredecessorHelper(Use, Visited, Worklist)) + if (SDNode::hasPredecessorHelper(Use, Visited, Worklist)) return false; // If Ptr may be folded in addressing mode of other use, then it's @@ -9720,7 +9852,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { ConstantSDNode *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); int X0, X1, Y0, Y1; - APInt Offset0 = CN->getAPIntValue(); + const APInt &Offset0 = CN->getAPIntValue(); APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; @@ -9984,13 +10116,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > LD->getMemOperand()->getBaseAlignment()) { - SDValue NewLoad = - DAG.getExtLoad(LD->getExtensionType(), SDLoc(N), - LD->getValueType(0), - Chain, Ptr, LD->getPointerInfo(), - LD->getMemoryVT(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), Align, LD->getAAInfo()); + SDValue NewLoad = DAG.getExtLoad( + LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr, + LD->getPointerInfo(), LD->getMemoryVT(), Align, + LD->getMemOperand()->getFlags(), LD->getAAInfo()); if (NewLoad.getNode() != N) return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); } @@ -10208,7 +10337,7 @@ struct LoadedSlice { return false; // Offsets are for indexed load only, we do not handle that. - if (Origin->getOffset().getOpcode() != ISD::UNDEF) + if (!Origin->getOffset().isUndef()) return false; const TargetLowering &TLI = DAG->getTargetLoweringInfo(); @@ -10291,10 +10420,10 @@ struct LoadedSlice { EVT SliceType = getLoadedType(); // Create the load for the slice. - SDValue LastInst = DAG->getLoad( - SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, - Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(), - Origin->isNonTemporal(), Origin->isInvariant(), getAlignment()); + SDValue LastInst = + DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, + Origin->getPointerInfo().getWithOffset(Offset), + getAlignment(), Origin->getMemOperand()->getFlags()); // If the final type is not the same as the loaded type, this means that // we have to pad with zero. Create a zero extend for that. EVT FinalType = Inst->getValueType(0); @@ -10718,9 +10847,10 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); ++OpsNarrowed; - return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr, - St->getPointerInfo().getWithOffset(StOffset), - false, false, NewAlign).getNode(); + return DAG + .getStore(St->getChain(), SDLoc(St), IVal, Ptr, + St->getPointerInfo().getWithOffset(StOffset), NewAlign) + .getNode(); } @@ -10826,19 +10956,16 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { Ptr.getValueType(), Ptr, DAG.getConstant(PtrOff, SDLoc(LD), Ptr.getValueType())); - SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), - LD->getChain(), NewPtr, - LD->getPointerInfo().getWithOffset(PtrOff), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), NewAlign, - LD->getAAInfo()); + SDValue NewLD = + DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, + LD->getPointerInfo().getWithOffset(PtrOff), NewAlign, + LD->getMemOperand()->getFlags(), LD->getAAInfo()); SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, DAG.getConstant(NewImm, SDLoc(Value), NewVT)); - SDValue NewST = DAG.getStore(Chain, SDLoc(N), - NewVal, NewPtr, - ST->getPointerInfo().getWithOffset(PtrOff), - false, false, NewAlign); + SDValue NewST = + DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr, + ST->getPointerInfo().getWithOffset(PtrOff), NewAlign); AddToWorklist(NewPtr.getNode()); AddToWorklist(NewLD.getNode()); @@ -10887,15 +11014,13 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { if (LDAlign < ABIAlign || STAlign < ABIAlign) return SDValue(); - SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), - LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), - false, false, false, LDAlign); + SDValue NewLD = + DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), LDAlign); - SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N), - NewLD, ST->getBasePtr(), - ST->getPointerInfo(), - false, false, STAlign); + SDValue NewST = + DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(), + ST->getPointerInfo(), STAlign); AddToWorklist(NewLD.getNode()); AddToWorklist(NewST.getNode()); @@ -10940,9 +11065,23 @@ struct BaseIndexOffset { } /// Parses tree in Ptr for base, index, offset addresses. - static BaseIndexOffset match(SDValue Ptr) { + static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) { bool IsIndexSignExt = false; + // Split up a folded GlobalAddress+Offset into its component parts. + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr)) + if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) { + return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(), + SDLoc(GA), + GA->getValueType(0), + /*Offset=*/0, + /*isTargetGA=*/false, + GA->getTargetFlags()), + SDValue(), + GA->getOffset(), + IsIndexSignExt); + } + // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD // instruction, then it could be just the BASE or everything else we don't // know how to handle. Just use Ptr as BASE and give up. @@ -11063,7 +11202,7 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, // multiply (CONST * A) after we also do the same transformation // to the "t2" instruction. if (OtherOp->getOpcode() == ISD::ADD && - isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) && + DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) && OtherOp->getOperand(0).getNode() == MulVar) return true; } @@ -11073,11 +11212,9 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, return false; } -SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG, - SDLoc SL, - ArrayRef<MemOpLink> Stores, - SmallVectorImpl<SDValue> &Chains, - EVT Ty) const { +SDValue DAGCombiner::getMergedConstantVectorStore( + SelectionDAG &DAG, const SDLoc &SL, ArrayRef<MemOpLink> Stores, + SmallVectorImpl<SDValue> &Chains, EVT Ty) const { SmallVector<SDValue, 8> BuildVector; for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) { @@ -11086,7 +11223,7 @@ SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG, BuildVector.push_back(St->getValue()); } - return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector); + return DAG.getBuildVector(Ty, SL, BuildVector); } bool DAGCombiner::MergeStoresOfConstantsOrVecElts( @@ -11182,29 +11319,36 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), - false, false, FirstInChain->getAlignment()); - // Replace the last store with the new store - CombineTo(LatestOp, NewStore); - // Erase all other stores. - for (unsigned i = 0; i < NumStores; ++i) { - if (StoreNodes[i].MemNode == LatestOp) - continue; - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - // ReplaceAllUsesWith will replace all uses that existed when it was - // called, but graph optimizations may cause new ones to appear. For - // example, the case in pr14333 looks like - // - // St's chain -> St -> another store -> X - // - // And the only difference from St to the other store is the chain. - // When we change it's chain to be St's chain they become identical, - // get CSEed and the net result is that X is now a use of St. - // Since we know that St is redundant, just iterate. - while (!St->use_empty()) - DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); - deleteAndRecombine(St); + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); + if (UseAA) { + // Replace all merged stores with the new store. + for (unsigned i = 0; i < NumStores; ++i) + CombineTo(StoreNodes[i].MemNode, NewStore); + } else { + // Replace the last store with the new store. + CombineTo(LatestOp, NewStore); + // Erase all other stores. + for (unsigned i = 0; i < NumStores; ++i) { + if (StoreNodes[i].MemNode == LatestOp) + continue; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + // ReplaceAllUsesWith will replace all uses that existed when it was + // called, but graph optimizations may cause new ones to appear. For + // example, the case in pr14333 looks like + // + // St's chain -> St -> another store -> X + // + // And the only difference from St to the other store is the chain. + // When we change it's chain to be St's chain they become identical, + // get CSEed and the net result is that X is now a use of St. + // Since we know that St is redundant, just iterate. + while (!St->use_empty()) + DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); + deleteAndRecombine(St); + } } return true; @@ -11215,14 +11359,14 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); // We must have a base and an offset. if (!BasePtr.Base.getNode()) return; // Do not handle stores to undef base pointers. - if (BasePtr.Base.getOpcode() == ISD::UNDEF) + if (BasePtr.Base.isUndef()) return; // Walk up the chain and look for nodes with offsets from the same @@ -11253,7 +11397,7 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( if (OtherST->getMemoryVT() != MemVT) continue; - BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr()); + BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr(), DAG); if (Ptr.equalBaseIndex(BasePtr)) StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++)); @@ -11269,7 +11413,7 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( break; // Find the base pointer and offset for this memory node. - BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG); // Check that the base pointer is the same as the original one. if (!Ptr.equalBaseIndex(BasePtr)) @@ -11280,9 +11424,8 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( break; // No truncation. - if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index)) - if (St->isTruncatingStore()) - break; + if (Index->isTruncatingStore()) + break; // The stored memory type must be the same. if (Index->getMemoryVT() != MemVT) @@ -11326,6 +11469,30 @@ void DAGCombiner::getStoreMergeAndAliasCandidates( } } +// We need to check that merging these stores does not cause a loop +// in the DAG. Any store candidate may depend on another candidate +// indirectly through its operand (we already consider dependencies +// through the chain). Check in parallel by searching up from +// non-chain operands of candidates. +bool DAGCombiner::checkMergeStoreCandidatesForDependencies( + SmallVectorImpl<MemOpLink> &StoreNodes) { + SmallPtrSet<const SDNode *, 16> Visited; + SmallVector<const SDNode *, 8> Worklist; + // search ops of store candidates + for (unsigned i = 0; i < StoreNodes.size(); ++i) { + SDNode *n = StoreNodes[i].MemNode; + // Potential loops may happen only through non-chain operands + for (unsigned j = 1; j < n->getNumOperands(); ++j) + Worklist.push_back(n->getOperand(j).getNode()); + } + // search through DAG. We can stop early if we find a storenode + for (unsigned i = 0; i < StoreNodes.size(); ++i) { + if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist)) + return false; + } + return true; +} + bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (OptLevel == CodeGenOpt::None) return false; @@ -11379,6 +11546,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (StoreNodes.size() < 2) return false; + // only do dep endence check in AA case + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); + if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes)) + return false; + // Sort the memory operands according to their distance from the // base pointer. As a secondary criteria: make sure stores coming // later in the code come first in the list. This is important for @@ -11557,7 +11730,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (Ld->getMemoryVT() != MemVT) break; - BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr()); + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); // If this is not the first ptr that we check. if (LdBasePtr.Base.getNode()) { // The base ptr must be the same. @@ -11690,16 +11863,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // The merged loads are required to have the same incoming chain, so // using the first's chain is acceptable. - SDValue NewLoad = DAG.getLoad( - JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign); + SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), + FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), FirstLoadAlign); SDValue NewStoreChain = DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains); - SDValue NewStore = DAG.getStore( - NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), false, false, FirstStoreAlign); + SDValue NewStore = + DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), FirstStoreAlign); // Transfer chain users from old loads to the new load. for (unsigned i = 0; i < NumElem; ++i) { @@ -11708,16 +11881,22 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { SDValue(NewLoad.getNode(), 1)); } - // Replace the last store with the new store. - CombineTo(LatestOp, NewStore); - // Erase all other stores. - for (unsigned i = 0; i < NumElem ; ++i) { - // Remove all Store nodes. - if (StoreNodes[i].MemNode == LatestOp) - continue; - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); - deleteAndRecombine(St); + if (UseAA) { + // Replace the all stores with the new store. + for (unsigned i = 0; i < NumElem; ++i) + CombineTo(StoreNodes[i].MemNode, NewStore); + } else { + // Replace the last store with the new store. + CombineTo(LatestOp, NewStore); + // Erase all other stores. + for (unsigned i = 0; i < NumElem; ++i) { + // Remove all Store nodes. + if (StoreNodes[i].MemNode == LatestOp) + continue; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); + deleteAndRecombine(St); + } } return true; @@ -11808,21 +11987,17 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { std::swap(Lo, Hi); unsigned Alignment = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); - SDValue St0 = DAG.getStore(Chain, DL, Lo, - Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, - ST->getAlignment(), AAInfo); + SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), + ST->getAlignment(), MMOFlags, AAInfo); Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, DAG.getConstant(4, DL, Ptr.getValueType())); Alignment = MinAlign(Alignment, 4U); - SDValue St1 = DAG.getStore(Chain, DL, Hi, - Ptr, ST->getPointerInfo().getWithOffset(4), - isVolatile, isNonTemporal, - Alignment, AAInfo); + SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr, + ST->getPointerInfo().getWithOffset(4), + Alignment, MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, St0, St1); } @@ -11841,21 +12016,24 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // resultant store does not need a higher alignment than the original. if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && ST->isUnindexed()) { - unsigned OrigAlign = ST->getAlignment(); EVT SVT = Value.getOperand(0).getValueType(); - unsigned Align = DAG.getDataLayout().getABITypeAlignment( - SVT.getTypeForEVT(*DAG.getContext())); - if (Align <= OrigAlign && - ((!LegalOperations && !ST->isVolatile()) || - TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) - return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), - Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), OrigAlign, - ST->getAAInfo()); + if (((!LegalOperations && !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) && + TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) { + unsigned OrigAlign = ST->getAlignment(); + bool Fast = false; + if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT, + ST->getAddressSpace(), OrigAlign, &Fast) && + Fast) { + return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, + ST->getPointerInfo(), OrigAlign, + ST->getMemOperand()->getFlags(), ST->getAAInfo()); + } + } } // Turn 'store undef, Ptr' -> nothing. - if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) + if (Value.isUndef() && ST->isUnindexed()) return Chain; // Try to infer better alignment information than the store already has. @@ -11863,10 +12041,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > ST->getAlignment()) { SDValue NewStore = - DAG.getTruncStore(Chain, SDLoc(N), Value, - Ptr, ST->getPointerInfo(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), Align, - ST->getAAInfo()); + DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), + ST->getMemoryVT(), Align, + ST->getMemOperand()->getFlags(), ST->getAAInfo()); if (NewStore.getNode() != N) return CombineTo(ST, NewStore, true); } @@ -11898,6 +12075,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // manipulation. Return the original node to not do anything else. return SDValue(ST, 0); } + Chain = ST->getChain(); } // Try transforming N to an indexed store. @@ -12001,7 +12179,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDLoc dl(N); // If the inserted element is an UNDEF, just use the input vector. - if (InVal.getOpcode() == ISD::UNDEF) + if (InVal.isUndef()) return InVec; EVT VT = InVec.getValueType(); @@ -12045,7 +12223,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { Ops.append(InVec.getNode()->op_begin(), InVec.getNode()->op_end()); - } else if (InVec.getOpcode() == ISD::UNDEF) { + } else if (InVec.isUndef()) { unsigned NElts = VT.getVectorNumElements(); Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); } else { @@ -12065,11 +12243,13 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { } // Return the new vector - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + return DAG.getBuildVector(VT, dl, Ops); } SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { + assert(!OriginalLoad->isVolatile()); + EVT ResultVT = EVE->getValueType(0); EVT VecEltVT = InVecVT.getVectorElementType(); unsigned Align = OriginalLoad->getAlignment(); @@ -12115,21 +12295,20 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( VecEltVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Load = DAG.getExtLoad( - ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI, - VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), - OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); + Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, + OriginalLoad->getChain(), NewPtr, MPI, VecEltVT, + Align, OriginalLoad->getMemOperand()->getFlags(), + OriginalLoad->getAAInfo()); Chain = Load.getValue(1); } else { - Load = DAG.getLoad( - VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, - OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), - OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); + Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, + MPI, Align, OriginalLoad->getMemOperand()->getFlags(), + OriginalLoad->getAAInfo()); Chain = Load.getValue(1); if (ResultVT.bitsLT(VecEltVT)) Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); else - Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load); + Load = DAG.getBitcast(ResultVT, Load); } WorklistRemover DeadNodes(*this); SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; @@ -12183,6 +12362,24 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // converts. } + // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x) + if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() && + ConstEltNo->isNullValue() && VT.isInteger()) { + SDValue BCSrc = InVec.getOperand(0); + if (BCSrc.getValueType().isScalarInteger()) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc); + } + + // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val + // + // This only really matters if the index is non-constant since other combines + // on the constant elements already work. + if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && + EltNo == InVec.getOperand(2)) { + SDValue Elt = InVec.getOperand(1); + return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt; + } + // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. // We only perform this optimization before the op legalization phase because // we may introduce new vector instructions which are not backed by TD @@ -12256,9 +12453,12 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { ISD::isNormalLoad(InVec.getNode()) && !N->getOperand(1)->hasPredecessor(InVec.getNode())) { SDValue Index = N->getOperand(1); - if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) - return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, - OrigLoad); + if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) { + if (!OrigLoad->isVolatile()) { + return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, + OrigLoad); + } + } } // Perform only after legalization to ensure build_vector / vector_shuffle @@ -12358,7 +12558,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { for (unsigned i = 0; i != NumInScalars; ++i) { SDValue In = N->getOperand(i); // Ignore undef inputs. - if (In.getOpcode() == ISD::UNDEF) continue; + if (In.isUndef()) continue; bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; @@ -12413,9 +12613,9 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { SDValue Cast = N->getOperand(i); assert((Cast.getOpcode() == ISD::ANY_EXTEND || Cast.getOpcode() == ISD::ZERO_EXTEND || - Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); + Cast.isUndef()) && "Invalid cast opcode"); SDValue In; - if (Cast.getOpcode() == ISD::UNDEF) + if (Cast.isUndef()) In = DAG.getUNDEF(SourceType); else In = Cast->getOperand(0); @@ -12434,12 +12634,12 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { if (!isTypeLegal(VecVT)) return SDValue(); // Make the new BUILD_VECTOR. - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); + SDValue BV = DAG.getBuildVector(VecVT, dl, Ops); // The new BUILD_VECTOR node has the potential to be further optimized. AddToWorklist(BV.getNode()); // Bitcast to the desired type. - return DAG.getNode(ISD::BITCAST, dl, VT, BV); + return DAG.getBitcast(VT, BV); } SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { @@ -12502,12 +12702,12 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { for (unsigned i = 0; i != NumInScalars; ++i) { SDValue In = N->getOperand(i); - if (In.getOpcode() == ISD::UNDEF) + if (In.isUndef()) Opnds.push_back(DAG.getUNDEF(SrcVT)); else Opnds.push_back(In.getOperand(0)); } - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds); + SDValue BV = DAG.getBuildVector(NVT, dl, Opnds); AddToWorklist(BV.getNode()); return DAG.getNode(Opcode, dl, VT, BV); @@ -12545,7 +12745,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { for (unsigned i = 0; i != NumInScalars; ++i) { SDValue Op = N->getOperand(i); // Ignore undef inputs. - if (Op.getOpcode() == ISD::UNDEF) continue; + if (Op.isUndef()) continue; // See if we can combine this build_vector into a blend with a zero vector. if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) { @@ -12681,7 +12881,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { SDValue Ops[2]; Ops[0] = VecIn1; Ops[1] = VecIn2; - return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]); + return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], Mask); } return SDValue(); @@ -12735,18 +12935,17 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { for (SDValue &Op : Ops) { if (Op.getValueType() == SVT) continue; - if (Op.getOpcode() == ISD::UNDEF) + if (Op.isUndef()) Op = ScalarUndef; else - Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op); + Op = DAG.getBitcast(SVT, Op); } } } EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT, VT.getSizeInBits() / SVT.getSizeInBits()); - return DAG.getNode(ISD::BITCAST, DL, VT, - DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops)); + return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops)); } // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR @@ -12768,7 +12967,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { Op = Op.getOperand(0); // UNDEF nodes convert to UNDEF shuffle mask values. - if (Op.getOpcode() == ISD::UNDEF) { + if (Op.isUndef()) { Mask.append((unsigned)NumOpElts, -1); continue; } @@ -12788,7 +12987,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { ExtVec = ExtVec.getOperand(0); // UNDEF nodes convert to UNDEF shuffle mask values. - if (ExtVec.getOpcode() == ISD::UNDEF) { + if (ExtVec.isUndef()) { Mask.append((unsigned)NumOpElts, -1); continue; } @@ -12812,11 +13011,11 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { return SDValue(); // At most we can reference 2 inputs in the final shuffle. - if (SV0.getOpcode() == ISD::UNDEF || SV0 == ExtVec) { + if (SV0.isUndef() || SV0 == ExtVec) { SV0 = ExtVec; for (int i = 0; i != NumOpElts; ++i) Mask.push_back(i + ExtIdx); - } else if (SV1.getOpcode() == ISD::UNDEF || SV1 == ExtVec) { + } else if (SV1.isUndef() || SV1 == ExtVec) { SV1 = ExtVec; for (int i = 0; i != NumOpElts; ++i) Mask.push_back(i + ExtIdx + NumElts); @@ -12844,7 +13043,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // Optimize concat_vectors where all but the first of the vectors are undef. if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) { - return Op.getOpcode() == ISD::UNDEF; + return Op.isUndef(); })) { SDValue In = N->getOperand(0); assert(In.getValueType().isVector() && "Must concat vectors"); @@ -12874,7 +13073,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { SDLoc dl = SDLoc(N); SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar); - return DAG.getNode(ISD::BITCAST, dl, VT, Res); + return DAG.getBitcast(VT, Res); } } @@ -12885,9 +13084,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { auto IsBuildVectorOrUndef = [](const SDValue &Op) { return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode(); }; - bool AllBuildVectorsOrUndefs = - std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef); - if (AllBuildVectorsOrUndefs) { + if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) { SmallVector<SDValue, 8> Opnds; EVT SVT = VT.getScalarType(); @@ -12926,7 +13123,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { assert(VT.getVectorNumElements() == Opnds.size() && "Concat vector type mismatch"); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); + return DAG.getBuildVector(VT, SDLoc(N), Opnds); } // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. @@ -12948,7 +13145,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDValue Op = N->getOperand(i); - if (Op.getOpcode() == ISD::UNDEF) + if (Op.isUndef()) continue; // Check if this is the identity extract: @@ -13033,11 +13230,11 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // otherwise => (extract_subvec V1, ExtIdx) if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() == ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits()) - return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1)); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, - DAG.getNode(ISD::BITCAST, dl, - N->getOperand(0).getValueType(), - V->getOperand(0)), N->getOperand(1)); + return DAG.getBitcast(NVT, V->getOperand(1)); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, NVT, + DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)), + N->getOperand(1)); } } @@ -13148,7 +13345,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { // Special case: shuffle(concat(A,B)) can be more efficiently represented // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high // half vector elements. - if (NumElemsPerConcat * 2 == NumElts && N1.getOpcode() == ISD::UNDEF && + if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() && std::all_of(SVN->getMask().begin() + NumElemsPerConcat, SVN->getMask().end(), [](int i) { return i == -1; })) { N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1), @@ -13204,7 +13401,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); // Canonicalize shuffle undef, undef -> undef - if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) + if (N0.isUndef() && N1.isUndef()) return DAG.getUNDEF(VT); ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); @@ -13217,29 +13414,15 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (Idx >= (int)NumElts) Idx -= NumElts; NewMask.push_back(Idx); } - return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), - &NewMask[0]); + return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask); } // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. - if (N0.getOpcode() == ISD::UNDEF) { - SmallVector<int, 8> NewMask; - for (unsigned i = 0; i != NumElts; ++i) { - int Idx = SVN->getMaskElt(i); - if (Idx >= 0) { - if (Idx >= (int)NumElts) - Idx -= NumElts; - else - Idx = -1; // remove reference to lhs - } - NewMask.push_back(Idx); - } - return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT), - &NewMask[0]); - } + if (N0.isUndef()) + return DAG.getCommutedVectorShuffle(*SVN); // Remove references to rhs if it is undef - if (N1.getOpcode() == ISD::UNDEF) { + if (N1.isUndef()) { bool Changed = false; SmallVector<int, 8> NewMask; for (unsigned i = 0; i != NumElts; ++i) { @@ -13251,7 +13434,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { NewMask.push_back(Idx); } if (Changed) - return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]); + return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask); } // If it is a splat, check if the argument vector is another splat or a @@ -13275,7 +13458,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SDValue Base; bool AllSame = true; for (unsigned i = 0; i != NumElts; ++i) { - if (V->getOperand(i).getOpcode() != ISD::UNDEF) { + if (!V->getOperand(i).isUndef()) { Base = V->getOperand(i); break; } @@ -13296,13 +13479,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // Canonicalize any other splat as a build_vector. const SDValue &Splatted = V->getOperand(SVN->getSplatIndex()); SmallVector<SDValue, 8> Ops(NumElts, Splatted); - SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), - V->getValueType(0), Ops); + SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops); // We may have jumped through bitcasts, so the type of the // BUILD_VECTOR may not match the type of the shuffle. if (V->getValueType(0) != VT) - NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV); + NewBV = DAG.getBitcast(VT, NewBV); return NewBV; } } @@ -13315,12 +13497,10 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (N0.getOpcode() == ISD::CONCAT_VECTORS && Level < AfterLegalizeVectorOps && - (N1.getOpcode() == ISD::UNDEF || + (N1.isUndef() || (N1.getOpcode() == ISD::CONCAT_VECTORS && N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) { - SDValue V = partitionShuffleOfConcats(N, DAG); - - if (V.getNode()) + if (SDValue V = partitionShuffleOfConcats(N, DAG)) return V; } @@ -13357,7 +13537,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { Op = TLI.isZExtFree(Op.getValueType(), SVT) ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT) : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops); + return DAG.getBuildVector(VT, SDLoc(N), Ops); } } @@ -13365,7 +13545,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // attempt to merge the 2 shuffles and suitably bitcast the inputs/output // back to their original types. if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && - N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps && + N1.isUndef() && Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { // Peek through the bitcast only if there is one user. @@ -13426,11 +13606,10 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } if (LegalMask) { - SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0); - SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1); - return DAG.getNode( - ISD::BITCAST, SDLoc(N), VT, - DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask)); + SV0 = DAG.getBitcast(ScaleVT, SV0); + SV1 = DAG.getBitcast(ScaleVT, SV1); + return DAG.getBitcast( + VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask)); } } } @@ -13451,7 +13630,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SDValue SV0 = N1->getOperand(0); SDValue SV1 = N1->getOperand(1); bool HasSameOp0 = N0 == SV0; - bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF; + bool IsSV1Undef = SV1.isUndef(); if (HasSameOp0 || IsSV1Undef || N0 == SV1) // Commute the operands of this shuffle so that next rule // will trigger. @@ -13504,7 +13683,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } // Simple case where 'CurrentVec' is UNDEF. - if (CurrentVec.getOpcode() == ISD::UNDEF) { + if (CurrentVec.isUndef()) { Mask.push_back(-1); continue; } @@ -13559,7 +13738,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) - return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]); + return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask); } return SDValue(); @@ -13595,26 +13774,30 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); + if (N0.getValueType() != N1.getValueType()) + return SDValue(); + // If the input vector is a concatenation, and the insert replaces // one of the halves, we can optimize into a single concat_vectors. - if (N0.getOpcode() == ISD::CONCAT_VECTORS && - N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) { + if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 && + N2.getOpcode() == ISD::Constant) { APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue(); EVT VT = N->getValueType(0); // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) -> // (concat_vectors Z, Y) if (InsIdx == 0) - return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, - N->getOperand(1), N0.getOperand(1)); + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N1, + N0.getOperand(1)); // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) -> // (concat_vectors X, Z) - if (InsIdx == VT.getVectorNumElements()/2) - return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, - N0.getOperand(0), N->getOperand(1)); + if (InsIdx == VT.getVectorNumElements() / 2) + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0.getOperand(0), + N1); } return SDValue(); @@ -13684,7 +13867,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { int EltIdx = i / Split; int SubIdx = i % Split; SDValue Elt = RHS.getOperand(EltIdx); - if (Elt.getOpcode() == ISD::UNDEF) { + if (Elt.isUndef()) { Indices.push_back(-1); continue; } @@ -13724,7 +13907,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { SDValue Zero = DAG.getConstant(0, dl, ClearVT); return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl, DAG.getBitcast(ClearVT, LHS), - Zero, &Indices[0])); + Zero, Indices)); }; // Determine maximum split level (byte level masking). @@ -13763,8 +13946,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { // -> (shuffle (VBinOp (A, B)), Undef, Mask). if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) && isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() && - LHS.getOperand(1).getOpcode() == ISD::UNDEF && - RHS.getOperand(1).getOpcode() == ISD::UNDEF) { + LHS.getOperand(1).isUndef() && + RHS.getOperand(1).isUndef()) { ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS); ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS); @@ -13776,15 +13959,15 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { N->getFlags()); AddUsersToWorklist(N); return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, - &SVN0->getMask()[0]); + SVN0->getMask()); } } return SDValue(); } -SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, - SDValue N1, SDValue N2){ +SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, + SDValue N2) { assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, @@ -13819,33 +14002,33 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, SDValue RHS) { - // fold (select (setcc x, -0.0, *lt), NaN, (fsqrt x)) - // The select + setcc is redundant, because fsqrt returns NaN for X < -0. + // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) + // The select + setcc is redundant, because fsqrt returns NaN for X < 0. if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) { if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) { // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?)) SDValue Sqrt = RHS; ISD::CondCode CC; SDValue CmpLHS; - const ConstantFPSDNode *NegZero = nullptr; + const ConstantFPSDNode *Zero = nullptr; if (TheSelect->getOpcode() == ISD::SELECT_CC) { CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get(); CmpLHS = TheSelect->getOperand(0); - NegZero = isConstOrConstSplatFP(TheSelect->getOperand(1)); + Zero = isConstOrConstSplatFP(TheSelect->getOperand(1)); } else { // SELECT or VSELECT SDValue Cmp = TheSelect->getOperand(0); if (Cmp.getOpcode() == ISD::SETCC) { CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get(); CmpLHS = Cmp.getOperand(0); - NegZero = isConstOrConstSplatFP(Cmp.getOperand(1)); + Zero = isConstOrConstSplatFP(Cmp.getOperand(1)); } } - if (NegZero && NegZero->isNegative() && NegZero->isZero() && + if (Zero && Zero->isZero() && Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT || CC == ISD::SETULT || CC == ISD::SETLT)) { - // We have: (select (setcc x, -0.0, *lt), NaN, (fsqrt x)) + // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) CombineTo(TheSelect, Sqrt); return true; } @@ -13932,24 +14115,22 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // It is safe to replace the two loads if they have different alignments, // but the new load must be the minimum (most restrictive) alignment of the // inputs. - bool isInvariant = LLD->isInvariant() & RLD->isInvariant(); unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment()); + MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags(); + if (!RLD->isInvariant()) + MMOFlags &= ~MachineMemOperand::MOInvariant; if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { - Load = DAG.getLoad(TheSelect->getValueType(0), - SDLoc(TheSelect), - // FIXME: Discards pointer and AA info. - LLD->getChain(), Addr, MachinePointerInfo(), - LLD->isVolatile(), LLD->isNonTemporal(), - isInvariant, Alignment); + // FIXME: Discards pointer and AA info. + Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect), + LLD->getChain(), Addr, MachinePointerInfo(), Alignment, + MMOFlags); } else { - Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? - RLD->getExtensionType() : LLD->getExtensionType(), - SDLoc(TheSelect), - TheSelect->getValueType(0), - // FIXME: Discards pointer and AA info. - LLD->getChain(), Addr, MachinePointerInfo(), - LLD->getMemoryVT(), LLD->isVolatile(), - LLD->isNonTemporal(), isInvariant, Alignment); + // FIXME: Discards pointer and AA info. + Load = DAG.getExtLoad( + LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() + : LLD->getExtensionType(), + SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr, + MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags); } // Users of the select now use the result of the load. @@ -13967,9 +14148,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, /// Simplify an expression of the form (N0 cond N1) ? N2 : N3 /// where 'cond' is the comparison specified by CC. -SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, - SDValue N2, SDValue N3, - ISD::CondCode CC, bool NotExtCompare) { +SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, + SDValue N2, SDValue N3, ISD::CondCode CC, + bool NotExtCompare) { // (x ? y : y) -> y. if (N2 == N3) return N2; @@ -14057,7 +14238,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, return DAG.getLoad( TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), - false, false, false, Alignment); + Alignment); } } @@ -14116,7 +14297,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { // Shift the tested bit over the sign bit. - APInt AndMask = ConstAndRHS->getAPIntValue(); + const APInt &AndMask = ConstAndRHS->getAPIntValue(); SDValue ShlAmt = DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS), getShiftAmountTy(AndLHS.getValueType())); @@ -14210,13 +14391,48 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, } } + // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X) + // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X) + // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X) + // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X) + // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X) + // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X) + // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X) + // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X) + if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { + SDValue ValueOnZero = N2; + SDValue Count = N3; + // If the condition is NE instead of E, swap the operands. + if (CC == ISD::SETNE) + std::swap(ValueOnZero, Count); + // Check if the value on zero is a constant equal to the bits in the type. + if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) { + if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) { + // If the other operand is cttz/cttz_zero_undef of N0, and cttz is + // legal, combine to just cttz. + if ((Count.getOpcode() == ISD::CTTZ || + Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) && + N0 == Count.getOperand(0) && + (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT))) + return DAG.getNode(ISD::CTTZ, DL, VT, N0); + // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is + // legal, combine to just ctlz. + if ((Count.getOpcode() == ISD::CTLZ || + Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) && + N0 == Count.getOperand(0) && + (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT))) + return DAG.getNode(ISD::CTLZ, DL, VT, N0); + } + } + } + return SDValue(); } /// This is a stub for TargetLowering::SimplifySetCC. -SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, - SDValue N1, ISD::CondCode Cond, - SDLoc DL, bool foldBooleans) { +SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, const SDLoc &DL, + bool foldBooleans) { TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, Level, false, this); return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); @@ -14227,6 +14443,11 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, /// by a magic number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue DAGCombiner::BuildSDIV(SDNode *N) { + // when optimising for minimum size, we don't want to expand a div to a mul + // and a shift. + if (DAG.getMachineFunction().getFunction()->optForMinSize()) + return SDValue(); + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); if (!C) return SDValue(); @@ -14268,6 +14489,11 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { /// number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue DAGCombiner::BuildUDIV(SDNode *N) { + // when optimising for minimum size, we don't want to expand a div to a mul + // and a shift. + if (DAG.getMachineFunction().getFunction()->optForMinSize()) + return SDValue(); + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); if (!C) return SDValue(); @@ -14334,9 +14560,9 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) { /// => /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) /// As a result, we precompute A/2 prior to the iteration loop. -SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, - unsigned Iterations, - SDNodeFlags *Flags) { +SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, + unsigned Iterations, + SDNodeFlags *Flags, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); @@ -14363,6 +14589,13 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); AddToWorklist(Est.getNode()); } + + // If non-reciprocal square root is requested, multiply the result by Arg. + if (!Reciprocal) { + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); + AddToWorklist(Est.getNode()); + } + return Est; } @@ -14371,35 +14604,55 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] /// => /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) -SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, - unsigned Iterations, - SDNodeFlags *Flags) { +SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, + unsigned Iterations, + SDNodeFlags *Flags, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT); - // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est) - for (unsigned i = 0; i < Iterations; ++i) { - SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); - AddToWorklist(HalfEst.getNode()); - - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); - AddToWorklist(Est.getNode()); + // This routine must enter the loop below to work correctly + // when (Reciprocal == false). + assert(Iterations > 0); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); - AddToWorklist(Est.getNode()); - - Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree, Flags); - AddToWorklist(Est.getNode()); + // Newton iterations for reciprocal square root: + // E = (E * -0.5) * ((A * E) * E + -3.0) + for (unsigned i = 0; i < Iterations; ++i) { + SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); + AddToWorklist(AE.getNode()); + + SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); + AddToWorklist(AEE.getNode()); + + SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); + AddToWorklist(RHS.getNode()); + + // When calculating a square root at the last iteration build: + // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) + // (notice a common subexpression) + SDValue LHS; + if (Reciprocal || (i + 1) < Iterations) { + // RSQRT: LHS = (E * -0.5) + LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); + } else { + // SQRT: LHS = (A * E) * -0.5 + LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); + } + AddToWorklist(LHS.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst, Flags); + Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); AddToWorklist(Est.getNode()); } + return Est; } -SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { +/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case +/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if +/// Op can be zero. +SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, + bool Reciprocal) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -14410,9 +14663,9 @@ SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) { AddToWorklist(Est.getNode()); if (Iterations) { - Est = UseOneConstNR ? - BuildRsqrtNROneConst(Op, Est, Iterations, Flags) : - BuildRsqrtNRTwoConst(Op, Est, Iterations, Flags); + Est = UseOneConstNR + ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) + : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); } return Est; } @@ -14420,6 +14673,30 @@ SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { return SDValue(); } +SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { + return buildSqrtEstimateImpl(Op, Flags, true); +} + +SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) { + SDValue Est = buildSqrtEstimateImpl(Op, Flags, false); + if (!Est) + return SDValue(); + + // Unfortunately, Est is now NaN if the input was exactly 0. + // Select out this case and force the answer to 0. + EVT VT = Est.getValueType(); + SDLoc DL(Op); + SDValue Zero = DAG.getConstantFP(0.0, DL, VT); + EVT CCVT = getSetCCResultType(VT); + SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, Zero, ISD::SETEQ); + AddToWorklist(ZeroCmp.getNode()); + + Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, ZeroCmp, + Zero, Est); + AddToWorklist(Est.getNode()); + return Est; +} + /// Return true if base is a frame index, which is known not to alias with /// anything but itself. Provides base object and offset as results. static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, @@ -14514,7 +14791,7 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) && (Op0->getMemoryVT().getSizeInBits() >> 3 == Op1->getMemoryVT().getSizeInBits() >> 3) && - (Op0->getOriginalAlignment() > Op0->getMemoryVT().getSizeInBits()) >> 3) { + (Op0->getOriginalAlignment() > (Op0->getMemoryVT().getSizeInBits() >> 3))) { int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment(); int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment(); @@ -14634,63 +14911,6 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, break; } } - - // We need to be careful here to also search for aliases through the - // value operand of a store, etc. Consider the following situation: - // Token1 = ... - // L1 = load Token1, %52 - // S1 = store Token1, L1, %51 - // L2 = load Token1, %52+8 - // S2 = store Token1, L2, %51+8 - // Token2 = Token(S1, S2) - // L3 = load Token2, %53 - // S3 = store Token2, L3, %52 - // L4 = load Token2, %53+8 - // S4 = store Token2, L4, %52+8 - // If we search for aliases of S3 (which loads address %52), and we look - // only through the chain, then we'll miss the trivial dependence on L1 - // (which also loads from %52). We then might change all loads and - // stores to use Token1 as their chain operand, which could result in - // copying %53 into %52 before copying %52 into %51 (which should - // happen first). - // - // The problem is, however, that searching for such data dependencies - // can become expensive, and the cost is not directly related to the - // chain depth. Instead, we'll rule out such configurations here by - // insisting that we've visited all chain users (except for users - // of the original chain, which is not necessary). When doing this, - // we need to look through nodes we don't care about (otherwise, things - // like register copies will interfere with trivial cases). - - SmallVector<const SDNode *, 16> Worklist; - for (const SDNode *N : Visited) - if (N != OriginalChain.getNode()) - Worklist.push_back(N); - - while (!Worklist.empty()) { - const SDNode *M = Worklist.pop_back_val(); - - // We have already visited M, and want to make sure we've visited any uses - // of M that we care about. For uses that we've not visisted, and don't - // care about, queue them to the worklist. - - for (SDNode::use_iterator UI = M->use_begin(), - UIE = M->use_end(); UI != UIE; ++UI) - if (UI.getUse().getValueType() == MVT::Other && - Visited.insert(*UI).second) { - if (isa<MemSDNode>(*UI)) { - // We've not visited this use, and we care about it (it could have an - // ordering dependency with the original node). - Aliases.clear(); - Aliases.push_back(OriginalChain); - return; - } - - // We've not visited this use, but we don't care about it. Mark it as - // visited and enqueue it to the worklist. - Worklist.push_back(*UI); - } - } } /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain @@ -14713,17 +14933,17 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); } -bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { +bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); // We must have a base and an offset. if (!BasePtr.Base.getNode()) return false; // Do not handle stores to undef base pointers. - if (BasePtr.Base.getOpcode() == ISD::UNDEF) + if (BasePtr.Base.isUndef()) return false; SmallVector<StoreSDNode *, 8> ChainedStores; @@ -14742,7 +14962,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { break; // Find the base pointer and offset for this memory node. - BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG); // Check that the base pointer is the same as the original one. if (!Ptr.equalBaseIndex(BasePtr)) @@ -14756,6 +14976,10 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { while (true) { if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) { // We found a store node. Use it for the next iteration. + if (STn->isVolatile() || STn->isIndexed()) { + Index = nullptr; + break; + } ChainedStores.push_back(STn); Index = STn; break; @@ -14769,7 +14993,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { } } - bool MadeChange = false; + bool MadeChangeToSt = false; SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains; for (StoreSDNode *ChainedStore : ChainedStores) { @@ -14777,7 +15001,8 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { SDValue BetterChain = FindBetterChain(ChainedStore, Chain); if (Chain != BetterChain) { - MadeChange = true; + if (ChainedStore == St) + MadeChangeToSt = true; BetterChains.push_back(std::make_pair(ChainedStore, BetterChain)); } } @@ -14787,7 +15012,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { for (auto Replacement : BetterChains) replaceStoreChain(Replacement.first, Replacement.second); - return MadeChange; + return MadeChangeToSt; } /// This is the entry point for the file. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index cfbb209..b10da00 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -39,7 +39,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Analysis.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" @@ -56,6 +55,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -88,6 +88,8 @@ void FastISel::ArgListEntry::setAttributes(ImmutableCallSite *CS, IsByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); IsInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca); IsReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); + IsSwiftSelf = CS->paramHasAttr(AttrIdx, Attribute::SwiftSelf); + IsSwiftError = CS->paramHasAttr(AttrIdx, Attribute::SwiftError); Alignment = CS->getParamAlignment(AttrIdx); } @@ -351,7 +353,8 @@ void FastISel::recomputeInsertPt() { void FastISel::removeDeadCode(MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) { - assert(I && E && std::distance(I, E) > 0 && "Invalid iterator!"); + assert(static_cast<MachineInstr *>(I) && static_cast<MachineInstr *>(E) && + std::distance(I, E) > 0 && "Invalid iterator!"); while (I != E) { MachineInstr *Dead = &*I; ++I; @@ -372,7 +375,7 @@ FastISel::SavePoint FastISel::enterLocalValueArea() { void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) { if (FuncInfo.InsertPt != FuncInfo.MBB->begin()) - LastLocalValue = std::prev(FuncInfo.InsertPt); + LastLocalValue = &*std::prev(FuncInfo.InsertPt); // Restore the previous insert position. FuncInfo.InsertPt = OldInsertPt.InsertPt; @@ -492,13 +495,11 @@ bool FastISel::selectGetElementPtr(const User *I) { uint64_t TotalOffs = 0; // FIXME: What's a good SWAG number for MaxOffs? uint64_t MaxOffs = 2048; - Type *Ty = I->getOperand(0)->getType(); MVT VT = TLI.getPointerTy(DL); - for (GetElementPtrInst::const_op_iterator OI = I->op_begin() + 1, - E = I->op_end(); - OI != E; ++OI) { - const Value *Idx = *OI; - if (auto *StTy = dyn_cast<StructType>(Ty)) { + for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); + GTI != E; ++GTI) { + const Value *Idx = GTI.getOperand(); + if (auto *StTy = dyn_cast<StructType>(*GTI)) { uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue(); if (Field) { // N = N + Offset @@ -511,9 +512,8 @@ bool FastISel::selectGetElementPtr(const User *I) { TotalOffs = 0; } } - Ty = StTy->getElementType(Field); } else { - Ty = cast<SequentialType>(Ty)->getElementType(); + Type *Ty = GTI.getIndexedType(); // If this is a constant subscript, handle it quickly. if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { @@ -880,9 +880,8 @@ bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol, unsigned NumArgs) { ImmutableCallSite CS(CI); - PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); - FunctionType *FTy = cast<FunctionType>(PT->getElementType()); - Type *RetTy = FTy->getReturnType(); + FunctionType *FTy = CS.getFunctionType(); + Type *RetTy = CS.getType(); ArgListTy Args; Args.reserve(NumArgs); @@ -960,6 +959,10 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { Flags.setInReg(); if (Arg.IsSRet) Flags.setSRet(); + if (Arg.IsSwiftSelf) + Flags.setSwiftSelf(); + if (Arg.IsSwiftError) + Flags.setSwiftError(); if (Arg.IsByVal) Flags.setByVal(); if (Arg.IsInAlloca) { @@ -1010,9 +1013,8 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { bool FastISel::lowerCall(const CallInst *CI) { ImmutableCallSite CS(CI); - PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); - FunctionType *FuncTy = cast<FunctionType>(PT->getElementType()); - Type *RetTy = FuncTy->getReturnType(); + FunctionType *FuncTy = CS.getFunctionType(); + Type *RetTy = CS.getType(); ArgListTy Args; ArgListEntry Entry; @@ -1322,6 +1324,15 @@ bool FastISel::selectBitCast(const User *I) { return true; } +// Return true if we should copy from swift error to the final vreg as specified +// by SwiftErrorWorklist. +static bool shouldCopySwiftErrorsToFinalVRegs(const TargetLowering &TLI, + FunctionLoweringInfo &FuncInfo) { + if (!TLI.supportSwiftError()) + return false; + return FuncInfo.SwiftErrorWorklist.count(FuncInfo.MBB); +} + // Remove local value instructions starting from the instruction after // SavedLastLocalValue to the current function insert point. void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue) @@ -1345,7 +1356,11 @@ bool FastISel::selectInstruction(const Instruction *I) { MachineInstr *SavedLastLocalValue = getLastLocalValue(); // Just before the terminator instruction, insert instructions to // feed PHI nodes in successor blocks. - if (isa<TerminatorInst>(I)) + if (isa<TerminatorInst>(I)) { + // If we need to materialize any vreg from worklist, we bail out of + // FastISel. + if (shouldCopySwiftErrorsToFinalVRegs(TLI, FuncInfo)) + return false; if (!handlePHINodesInSuccessorBlocks(I->getParent())) { // PHI node handling may have generated local value instructions, // even though it failed to handle all PHI nodes. @@ -1354,6 +1369,13 @@ bool FastISel::selectInstruction(const Instruction *I) { removeDeadLocalValueCode(SavedLastLocalValue); return false; } + } + + // FastISel does not handle any operand bundles except OB_funclet. + if (ImmutableCallSite CS = ImmutableCallSite(I)) + for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i) + if (CS.getOperandBundleAt(i).getTagID() != LLVMContext::OB_funclet) + return false; DbgLoc = I->getDebugLoc(); @@ -1413,7 +1435,8 @@ bool FastISel::selectInstruction(const Instruction *I) { /// Emit an unconditional branch to the given block, unless it is the immediate /// (fall-through) successor, and update the CFG. -void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { +void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, + const DebugLoc &DbgLoc) { if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { // For more accurate line information if this is the only instruction @@ -2053,7 +2076,7 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); return false; } - FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); + FuncInfo.PHINodesToUpdate.push_back(std::make_pair(&*MBBI++, Reg)); DbgLoc = DebugLoc(); } } @@ -2138,7 +2161,7 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const { const Value *Ptr; Type *ValTy; unsigned Alignment; - unsigned Flags; + MachineMemOperand::Flags Flags; bool IsVolatile; if (const auto *LI = dyn_cast<LoadInst>(I)) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 08815ed..e669ffc 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -88,6 +88,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, RegInfo = &MF->getRegInfo(); MachineModuleInfo &MMI = MF->getMMI(); const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + unsigned StackAlign = TFI->getStackAlignment(); // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; @@ -96,6 +97,31 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF, Fn->isVarArg(), Outs, Fn->getContext()); + // If this personality uses funclets, we need to do a bit more work. + DenseMap<const AllocaInst *, int *> CatchObjects; + EHPersonality Personality = classifyEHPersonality( + Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr); + if (isFuncletEHPersonality(Personality)) { + // Calculate state numbers if we haven't already. + WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo(); + if (Personality == EHPersonality::MSVC_CXX) + calculateWinCXXEHStateNumbers(&fn, EHInfo); + else if (isAsynchronousEHPersonality(Personality)) + calculateSEHStateNumbers(&fn, EHInfo); + else if (Personality == EHPersonality::CoreCLR) + calculateClrEHStateNumbers(&fn, EHInfo); + + // Map all BB references in the WinEH data to MBBs. + for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { + for (WinEHHandlerType &H : TBME.HandlerArray) { + if (const AllocaInst *AI = H.CatchObj.Alloca) + CatchObjects.insert({AI, &H.CatchObj.FrameIndex}); + else + H.CatchObj.FrameIndex = INT_MAX; + } + } + } + // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines // them. @@ -108,7 +134,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, unsigned Align = std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty), AI->getAlignment()); - unsigned StackAlign = TFI->getStackAlignment(); // Static allocas can be folded into the initial stack frame // adjustment. For targets that don't realign the stack, don't @@ -120,9 +145,21 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. + int FrameIndex = INT_MAX; + auto Iter = CatchObjects.find(AI); + if (Iter != CatchObjects.end() && TLI->needsFixedCatchObjects()) { + FrameIndex = MF->getFrameInfo()->CreateFixedObject( + TySize, 0, /*Immutable=*/false, /*isAliased=*/true); + MF->getFrameInfo()->setObjectAlignment(FrameIndex, Align); + } else { + FrameIndex = + MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); + } - StaticAllocaMap[AI] = - MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); + StaticAllocaMap[AI] = FrameIndex; + // Update the catch handler information. + if (Iter != CatchObjects.end()) + *Iter->second = FrameIndex; } else { // FIXME: Overaligned static allocas should be grouped into // a single dynamic allocation instead of using a separate @@ -281,31 +318,14 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, LPads.push_back(LPI); } - // If this personality uses funclets, we need to do a bit more work. - if (!Fn->hasPersonalityFn()) - return; - EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn()); if (!isFuncletEHPersonality(Personality)) return; - // Calculate state numbers if we haven't already. WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo(); - if (Personality == EHPersonality::MSVC_CXX) - calculateWinCXXEHStateNumbers(&fn, EHInfo); - else if (isAsynchronousEHPersonality(Personality)) - calculateSEHStateNumbers(&fn, EHInfo); - else if (Personality == EHPersonality::CoreCLR) - calculateClrEHStateNumbers(&fn, EHInfo); // Map all BB references in the WinEH data to MBBs. for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { for (WinEHHandlerType &H : TBME.HandlerArray) { - if (H.CatchObj.Alloca) { - assert(StaticAllocaMap.count(H.CatchObj.Alloca)); - H.CatchObj.FrameIndex = StaticAllocaMap[H.CatchObj.Alloca]; - } else { - H.CatchObj.FrameIndex = INT_MAX; - } if (H.Handler) H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()]; } @@ -336,7 +356,7 @@ void FunctionLoweringInfo::clear() { ByValArgFrameIndexMap.clear(); RegFixups.clear(); StatepointStackSlots.clear(); - StatepointRelocatedValues.clear(); + StatepointSpillMaps.clear(); PreferredExtendType.clear(); } @@ -575,3 +595,21 @@ void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI, } } } + +unsigned FunctionLoweringInfo::findSwiftErrorVReg(const MachineBasicBlock *MBB, + const Value* Val) const { + // Find the index in SwiftErrorVals. + SwiftErrorValues::const_iterator I = + std::find(SwiftErrorVals.begin(), SwiftErrorVals.end(), Val); + assert(I != SwiftErrorVals.end() && "Can't find value in SwiftErrorVals"); + return SwiftErrorMap.lookup(MBB)[I - SwiftErrorVals.begin()]; +} + +void FunctionLoweringInfo::setSwiftErrorVReg(const MachineBasicBlock *MBB, + const Value* Val, unsigned VReg) { + // Find the index in SwiftErrorVals. + SwiftErrorValues::iterator I = + std::find(SwiftErrorVals.begin(), SwiftErrorVals.end(), Val); + assert(I != SwiftErrorVals.end() && "Can't find value in SwiftErrorVals"); + SwiftErrorMap[MBB][I - SwiftErrorVals.begin()] = VReg; +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index a1e2d41..c8af73a 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -319,7 +320,6 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, "Chain and glue operands should occur at end of operand list!"); // Get/emit the operand. unsigned VReg = getVR(Op, VRBaseMap); - assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?"); const MCInstrDesc &MCID = MIB->getDesc(); bool isOptDef = IIOpNum < MCID.getNumOperands() && @@ -333,6 +333,8 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, const TargetRegisterClass *DstRC = nullptr; if (IIOpNum < II->getNumOperands()) DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF)); + assert((!DstRC || TargetRegisterInfo::isVirtualRegister(VReg)) && + "Expected VReg"); if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), @@ -440,7 +442,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, } unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, - MVT VT, DebugLoc DL) { + MVT VT, const DebugLoc &DL) { const TargetRegisterClass *VRC = MRI->getRegClass(VReg); const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx); @@ -873,7 +875,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Run post-isel target hook to adjust this instruction if needed. if (II.hasPostISelHook()) - TLI->AdjustInstrPostInstrSelection(MIB, Node); + TLI->AdjustInstrPostInstrSelection(*MIB, Node); } /// EmitSpecialNode - Generate machine code for a target-independent node and diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index 3b24d93..8a8a1bb 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -83,8 +83,8 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// ConstrainForSubReg - Try to constrain VReg to a register class that /// supports SubIdx sub-registers. Emit a copy if that isn't possible. /// Return the virtual register to use. - unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, - MVT VT, DebugLoc DL); + unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, MVT VT, + const DebugLoc &DL); /// EmitSubregNode - Generate machine code for subreg nodes. /// @@ -132,7 +132,7 @@ public: /// InstrEmitter - Construct an InstrEmitter and set it to start inserting /// at the given position in the given block. InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos); - + private: void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, DenseMap<SDValue, unsigned> &VRBaseMap); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f783634..18ad910 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -11,15 +11,15 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -93,25 +93,25 @@ private: /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it /// is necessary to spill the vector being inserted into to memory, perform /// the insert there, and then read the result back. - SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, - SDValue Idx, SDLoc dl); - SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, - SDValue Idx, SDLoc dl); + SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, + const SDLoc &dl); + SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, + const SDLoc &dl); /// Return a vector shuffle operation which /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> - SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl, + SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef<int> Mask) const; bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, - bool &NeedInvert, SDLoc dl); + bool &NeedInvert, const SDLoc &dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, - unsigned NumOps, bool isSigned, SDLoc dl); + unsigned NumOps, bool isSigned, const SDLoc &dl); std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); @@ -128,26 +128,28 @@ private: void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); - SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, SDLoc dl); + SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, + const SDLoc &dl); SDValue ExpandBUILD_VECTOR(SDNode *Node); SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); void ExpandDYNAMIC_STACKALLOC(SDNode *Node, SmallVectorImpl<SDValue> &Results); - void getSignAsIntValue(FloatSignAsInt &State, SDLoc DL, SDValue Value) const; - SDValue modifySignAsInt(const FloatSignAsInt &State, SDLoc DL, + void getSignAsIntValue(FloatSignAsInt &State, const SDLoc &DL, + SDValue Value) const; + SDValue modifySignAsInt(const FloatSignAsInt &State, const SDLoc &DL, SDValue NewIntValue) const; SDValue ExpandFCOPYSIGN(SDNode *Node) const; SDValue ExpandFABS(SDNode *Node) const; SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT, - SDLoc dl); + const SDLoc &dl); SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, - SDLoc dl); + const SDLoc &dl); SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, - SDLoc dl); + const SDLoc &dl); - SDValue ExpandBITREVERSE(SDValue Op, SDLoc dl); - SDValue ExpandBSWAP(SDValue Op, SDLoc dl); - SDValue ExpandBitCount(unsigned Opc, SDValue Op, SDLoc dl); + SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl); + SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl); + SDValue ExpandBitCount(unsigned Opc, SDValue Op, const SDLoc &dl); SDValue ExpandExtractFromVectorThroughStack(SDValue Op); SDValue ExpandInsertToVectorThroughStack(SDValue Op); @@ -176,8 +178,6 @@ public: "Replacing one node with another that produces a different number " "of values!"); DAG.ReplaceAllUsesWith(Old, New); - for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) - DAG.TransferDbgValues(SDValue(Old, i), SDValue(New, i)); if (UpdatedNodes) UpdatedNodes->insert(New); ReplacedNode(Old); @@ -187,7 +187,6 @@ public: dbgs() << " with: "; New->dump(&DAG)); DAG.ReplaceAllUsesWith(Old, New); - DAG.TransferDbgValues(Old, New); if (UpdatedNodes) UpdatedNodes->insert(New.getNode()); ReplacedNode(Old.getNode()); @@ -200,7 +199,6 @@ public: DEBUG(dbgs() << (i == 0 ? " with: " : " and: "); New[i]->dump(&DAG)); - DAG.TransferDbgValues(SDValue(Old, i), New[i]); if (UpdatedNodes) UpdatedNodes->insert(New[i].getNode()); } @@ -213,10 +211,9 @@ public: /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> -SDValue -SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl, - SDValue N1, SDValue N2, - ArrayRef<int> Mask) const { +SDValue SelectionDAGLegalize::ShuffleWithNarrowerEltType( + EVT NVT, EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, + ArrayRef<int> Mask) const { unsigned NumMaskElts = VT.getVectorNumElements(); unsigned NumDestElts = NVT.getVectorNumElements(); unsigned NumEltsGrowth = NumDestElts / NumMaskElts; @@ -224,7 +221,7 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl, assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!"); if (NumEltsGrowth == 1) - return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]); + return DAG.getVectorShuffle(NVT, dl, N1, N2, Mask); SmallVector<int, 8> NewMask; for (unsigned i = 0; i != NumMaskElts; ++i) { @@ -238,7 +235,7 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl, } assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?"); assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?"); - return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]); + return DAG.getVectorShuffle(NVT, dl, N1, N2, NewMask); } /// Expands the ConstantFP node to an integer constant or @@ -285,13 +282,12 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { SDValue Result = DAG.getExtLoad( ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), VT, - false, false, false, Alignment); + Alignment); return Result; } - SDValue Result = - DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), - false, false, false, Alignment); + SDValue Result = DAG.getLoad( + OrigVT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); return Result; } @@ -302,301 +298,20 @@ SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) { SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(), TLI.getPointerTy(DAG.getDataLayout())); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); - SDValue Result = - DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), - false, false, false, Alignment); + SDValue Result = DAG.getLoad( + VT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); return Result; } -/// Expands an unaligned store to 2 half-size stores. -static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, - const TargetLowering &TLI, - SelectionDAGLegalize *DAGLegalize) { - assert(ST->getAddressingMode() == ISD::UNINDEXED && - "unaligned indexed stores not implemented!"); - SDValue Chain = ST->getChain(); - SDValue Ptr = ST->getBasePtr(); - SDValue Val = ST->getValue(); - EVT VT = Val.getValueType(); - int Alignment = ST->getAlignment(); - unsigned AS = ST->getAddressSpace(); - - SDLoc dl(ST); - if (ST->getMemoryVT().isFloatingPoint() || - ST->getMemoryVT().isVector()) { - EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); - if (TLI.isTypeLegal(intVT)) { - // Expand to a bitconvert of the value to the integer type of the - // same size, then a (misaligned) int store. - // FIXME: Does not handle truncating floating point stores! - SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); - Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), - ST->isVolatile(), ST->isNonTemporal(), Alignment); - DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); - return; - } - // Do a (aligned) store to a stack slot, then copy from the stack slot - // to the final destination using (unaligned) integer loads and stores. - EVT StoredVT = ST->getMemoryVT(); - MVT RegVT = - TLI.getRegisterType(*DAG.getContext(), - EVT::getIntegerVT(*DAG.getContext(), - StoredVT.getSizeInBits())); - unsigned StoredBytes = StoredVT.getSizeInBits() / 8; - unsigned RegBytes = RegVT.getSizeInBits() / 8; - unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; - - // Make sure the stack slot is also aligned for the register type. - SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT); - - // Perform the original store, only redirected to the stack slot. - SDValue Store = DAG.getTruncStore(Chain, dl, - Val, StackPtr, MachinePointerInfo(), - StoredVT, false, false, 0); - SDValue Increment = DAG.getConstant( - RegBytes, dl, TLI.getPointerTy(DAG.getDataLayout(), AS)); - SmallVector<SDValue, 8> Stores; - unsigned Offset = 0; - - // Do all but one copies using the full register width. - for (unsigned i = 1; i < NumRegs; i++) { - // Load one integer register's worth from the stack slot. - SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, - MachinePointerInfo(), - false, false, false, 0); - // Store it to the final location. Remember the store. - Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, - ST->getPointerInfo().getWithOffset(Offset), - ST->isVolatile(), ST->isNonTemporal(), - MinAlign(ST->getAlignment(), Offset))); - // Increment the pointers. - Offset += RegBytes; - StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - Increment); - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); - } - - // The last store may be partial. Do a truncating store. On big-endian - // machines this requires an extending load from the stack slot to ensure - // that the bits are in the right place. - EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), - 8 * (StoredBytes - Offset)); - - // Load from the stack slot. - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, - MachinePointerInfo(), - MemVT, false, false, false, 0); - - Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, - ST->getPointerInfo() - .getWithOffset(Offset), - MemVT, ST->isVolatile(), - ST->isNonTemporal(), - MinAlign(ST->getAlignment(), Offset), - ST->getAAInfo())); - // The order of the stores doesn't matter - say it with a TokenFactor. - SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); - DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); - return; - } - assert(ST->getMemoryVT().isInteger() && - !ST->getMemoryVT().isVector() && - "Unaligned store of unknown type."); - // Get the half-size VT - EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext()); - int NumBits = NewStoredVT.getSizeInBits(); - int IncrementSize = NumBits / 8; - - // Divide the stored value in two parts. - SDValue ShiftAmount = - DAG.getConstant(NumBits, dl, TLI.getShiftAmountTy(Val.getValueType(), - DAG.getDataLayout())); - SDValue Lo = Val; - SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount); - - // Store the two parts - SDValue Store1, Store2; - Store1 = DAG.getTruncStore(Chain, dl, - DAG.getDataLayout().isLittleEndian() ? Lo : Hi, - Ptr, ST->getPointerInfo(), NewStoredVT, - ST->isVolatile(), ST->isNonTemporal(), Alignment); - - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, - TLI.getPointerTy(DAG.getDataLayout(), AS))); - Alignment = MinAlign(Alignment, IncrementSize); - Store2 = DAG.getTruncStore( - Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr, - ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, - ST->isVolatile(), ST->isNonTemporal(), Alignment, ST->getAAInfo()); - - SDValue Result = - DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); - DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); -} - -/// Expands an unaligned load to 2 half-size loads. -static void -ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, - const TargetLowering &TLI, - SDValue &ValResult, SDValue &ChainResult) { - assert(LD->getAddressingMode() == ISD::UNINDEXED && - "unaligned indexed loads not implemented!"); - SDValue Chain = LD->getChain(); - SDValue Ptr = LD->getBasePtr(); - EVT VT = LD->getValueType(0); - EVT LoadedVT = LD->getMemoryVT(); - SDLoc dl(LD); - if (VT.isFloatingPoint() || VT.isVector()) { - EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); - if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) { - // Expand to a (misaligned) integer load of the same size, - // then bitconvert to floating point or vector. - SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, - LD->getMemOperand()); - SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); - if (LoadedVT != VT) - Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND : - ISD::ANY_EXTEND, dl, VT, Result); - - ValResult = Result; - ChainResult = newLoad.getValue(1); - return; - } - - // Copy the value to a (aligned) stack slot using (unaligned) integer - // loads and stores, then do a (aligned) load from the stack slot. - MVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT); - unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; - unsigned RegBytes = RegVT.getSizeInBits() / 8; - unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; - - // Make sure the stack slot is also aligned for the register type. - SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); - - SDValue Increment = - DAG.getConstant(RegBytes, dl, TLI.getPointerTy(DAG.getDataLayout())); - SmallVector<SDValue, 8> Stores; - SDValue StackPtr = StackBase; - unsigned Offset = 0; - - // Do all but one copies using the full register width. - for (unsigned i = 1; i < NumRegs; i++) { - // Load one integer register's worth from the original location. - SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, - LD->getPointerInfo().getWithOffset(Offset), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), - MinAlign(LD->getAlignment(), Offset), - LD->getAAInfo()); - // Follow the load with a store to the stack slot. Remember the store. - Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, - MachinePointerInfo(), false, false, 0)); - // Increment the pointers. - Offset += RegBytes; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); - StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - Increment); - } - - // The last copy may be partial. Do an extending load. - EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), - 8 * (LoadedBytes - Offset)); - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, - LD->getPointerInfo().getWithOffset(Offset), - MemVT, LD->isVolatile(), - LD->isNonTemporal(), - LD->isInvariant(), - MinAlign(LD->getAlignment(), Offset), - LD->getAAInfo()); - // Follow the load with a store to the stack slot. Remember the store. - // On big-endian machines this requires a truncating store to ensure - // that the bits end up in the right place. - Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, - MachinePointerInfo(), MemVT, - false, false, 0)); - - // The order of the stores doesn't matter - say it with a TokenFactor. - SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); - - // Finally, perform the original load only redirected to the stack slot. - Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, - MachinePointerInfo(), LoadedVT, false,false, false, - 0); - - // Callers expect a MERGE_VALUES node. - ValResult = Load; - ChainResult = TF; - return; - } - assert(LoadedVT.isInteger() && !LoadedVT.isVector() && - "Unaligned load of unsupported type."); - - // Compute the new VT that is half the size of the old one. This is an - // integer MVT. - unsigned NumBits = LoadedVT.getSizeInBits(); - EVT NewLoadedVT; - NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2); - NumBits >>= 1; - - unsigned Alignment = LD->getAlignment(); - unsigned IncrementSize = NumBits / 8; - ISD::LoadExtType HiExtType = LD->getExtensionType(); - - // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD. - if (HiExtType == ISD::NON_EXTLOAD) - HiExtType = ISD::ZEXTLOAD; - - // Load the value in two parts - SDValue Lo, Hi; - if (DAG.getDataLayout().isLittleEndian()) { - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), - NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), LD->isInvariant(), Alignment, - LD->getAAInfo()); - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); - Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, - LD->getPointerInfo().getWithOffset(IncrementSize), - NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(),LD->isInvariant(), - MinAlign(Alignment, IncrementSize), LD->getAAInfo()); - } else { - Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), - NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), LD->isInvariant(), Alignment, - LD->getAAInfo()); - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, - LD->getPointerInfo().getWithOffset(IncrementSize), - NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), LD->isInvariant(), - MinAlign(Alignment, IncrementSize), LD->getAAInfo()); - } - - // aggregate the two parts - SDValue ShiftAmount = - DAG.getConstant(NumBits, dl, TLI.getShiftAmountTy(Hi.getValueType(), - DAG.getDataLayout())); - SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount); - Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo); - - SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - ValResult = Result; - ChainResult = TF; -} - /// Some target cannot handle a variable insertion index for the /// INSERT_VECTOR_ELT instruction. In this case, it /// is necessary to spill the vector being inserted into to memory, perform /// the insert there, and then read the result back. -SDValue SelectionDAGLegalize:: -PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, - SDLoc dl) { +SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec, + SDValue Val, + SDValue Idx, + const SDLoc &dl) { SDValue Tmp1 = Vec; SDValue Tmp2 = Val; SDValue Tmp3 = Idx; @@ -618,8 +333,7 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, // Store the vector. SDValue Ch = DAG.getStore( DAG.getEntryNode(), dl, Tmp1, StackPtr, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false, - false, 0); + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI)); // Truncate or zero extend offset to target pointer type. Tmp3 = DAG.getZExtOrTrunc(Tmp3, dl, PtrVT); @@ -629,17 +343,15 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, DAG.getConstant(EltSize, dl, IdxVT)); SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr); // Store the scalar value. - Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT, - false, false, 0); + Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT); // Load the updated vector. return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack( - DAG.getMachineFunction(), SPFI), - false, false, false, 0); + DAG.getMachineFunction(), SPFI)); } - -SDValue SelectionDAGLegalize:: -ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, SDLoc dl) { +SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, + SDValue Idx, + const SDLoc &dl) { if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) { // SCALAR_TO_VECTOR requires that the type of the value being inserted // match the element type of the vector being created, except for @@ -658,8 +370,7 @@ ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, SDLoc dl) { for (unsigned i = 0; i != NumElts; ++i) ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts); - return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec, - &ShufOps[0]); + return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec, ShufOps); } } return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl); @@ -676,8 +387,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); unsigned Alignment = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDLoc dl(ST); if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { @@ -686,8 +396,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Con = DAG.getConstant(CFP->getValueAPF(). bitcastToAPInt().zextOrTrunc(32), SDLoc(CFP), MVT::i32); - return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, AAInfo); + return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), Alignment, + MMOFlags, AAInfo); } if (CFP->getValueType(0) == MVT::f64) { @@ -696,7 +406,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), SDLoc(CFP), MVT::i64); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, AAInfo); + Alignment, MMOFlags, AAInfo); } if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { @@ -709,14 +419,13 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); - Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment, AAInfo); + Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), Alignment, + MMOFlags, AAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(4, dl, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), - isVolatile, isNonTemporal, MinAlign(Alignment, 4U), - AAInfo); + MinAlign(Alignment, 4U), MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -732,8 +441,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { SDLoc dl(Node); unsigned Alignment = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); if (!ST->isTruncatingStore()) { @@ -754,8 +462,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { unsigned AS = ST->getAddressSpace(); unsigned Align = ST->getAlignment(); const DataLayout &DL = DAG.getDataLayout(); - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) - ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { + SDValue Result = TLI.expandUnalignedStore(ST, DAG); + ReplaceNode(SDValue(ST, 0), Result); + } break; } case TargetLowering::Custom: { @@ -770,9 +480,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { "Can only promote stores to same size type"); Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value); SDValue Result = - DAG.getStore(Chain, dl, Value, Ptr, - ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment, AAInfo); + DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + Alignment, MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -794,8 +503,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { StVT.getStoreSizeInBits()); Value = DAG.getZeroExtendInReg(Value, dl, StVT); SDValue Result = - DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment, AAInfo); + DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT, + Alignment, MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. @@ -815,9 +524,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) // Store the bottom RoundWidth bits. Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - RoundVT, - isVolatile, isNonTemporal, Alignment, - AAInfo); + RoundVT, Alignment, MMOFlags, AAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -828,10 +535,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(RoundWidth, dl, TLI.getShiftAmountTy(Value.getValueType(), DL))); - Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, - ST->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), AAInfo); + Hi = DAG.getTruncStore( + Chain, dl, Hi, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); } else { // Big endian - avoid unaligned stores. // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X @@ -841,18 +548,17 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { DAG.getConstant(ExtraWidth, dl, TLI.getShiftAmountTy(Value.getValueType(), DL))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), - RoundVT, isVolatile, isNonTemporal, Alignment, - AAInfo); + RoundVT, Alignment, MMOFlags, AAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); - Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, - ST->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), AAInfo); + Lo = DAG.getTruncStore( + Chain, dl, Value, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); } // The order of the stores doesn't matter. @@ -867,8 +573,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { unsigned Align = ST->getAlignment(); // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) - ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { + SDValue Result = TLI.expandUnalignedStore(ST, DAG); + ReplaceNode(SDValue(ST, 0), Result); + } break; } case TargetLowering::Custom: { @@ -886,8 +594,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { "Do not know how to expand this store!"); Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); SDValue Result = - DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, AAInfo); + DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + Alignment, MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -917,13 +625,13 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { const DataLayout &DL = DAG.getDataLayout(); // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) - ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { + std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG); + } break; } case TargetLowering::Custom: { - SDValue Res = TLI.LowerOperation(RVal, DAG); - if (Res.getNode()) { + if (SDValue Res = TLI.LowerOperation(RVal, DAG)) { RVal = Res; RChain = Res.getValue(1); } @@ -956,9 +664,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { EVT SrcVT = LD->getMemoryVT(); unsigned SrcWidth = SrcVT.getSizeInBits(); unsigned Alignment = LD->getAlignment(); - bool isVolatile = LD->isVolatile(); - bool isNonTemporal = LD->isNonTemporal(); - bool isInvariant = LD->isInvariant(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); if (SrcWidth != SrcVT.getStoreSizeInBits() && @@ -985,10 +691,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; SDValue Result = - DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), - Chain, Ptr, LD->getPointerInfo(), - NVT, isVolatile, isNonTemporal, isInvariant, Alignment, - AAInfo); + DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo(), NVT, Alignment, MMOFlags, AAInfo); Ch = Result.getValue(1); // The chain. @@ -1023,10 +727,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { if (DL.isLittleEndian()) { // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) // Load the bottom RoundWidth bits. - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), - Chain, Ptr, - LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, isInvariant, Alignment, AAInfo); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo(), RoundVT, Alignment, MMOFlags, + AAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -1035,8 +738,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, isInvariant, - MinAlign(Alignment, IncrementSize), AAInfo); + ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, + AAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -1056,19 +759,18 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 // Load the top RoundWidth bits. Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, - LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, isInvariant, Alignment, AAInfo); + LD->getPointerInfo(), RoundVT, Alignment, MMOFlags, + AAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, - dl, Node->getValueType(0), Chain, Ptr, + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, isInvariant, - MinAlign(Alignment, IncrementSize), AAInfo); + ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, + AAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -1099,8 +801,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Chain = SDValue(Node, 1); if (isCustom) { - SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Res.getNode()) { + if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) { Value = Res; Chain = Res.getValue(1); } @@ -1111,8 +812,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { unsigned AS = LD->getAddressSpace(); unsigned Align = LD->getAlignment(); const DataLayout &DL = DAG.getDataLayout(); - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) - ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { + std::tie(Value, Chain) = TLI.expandUnalignedLoad(LD, DAG); + } } break; } @@ -1297,6 +999,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::MERGE_VALUES: case ISD::EH_RETURN: case ISD::FRAME_TO_ARGS_OFFSET: + case ISD::EH_DWARF_CFA: case ISD::EH_SJLJ_SETJMP: case ISD::EH_SJLJ_LONGJMP: case ISD::EH_SJLJ_SETUP_DISPATCH: @@ -1399,8 +1102,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case TargetLowering::Custom: { // FIXME: The handling for custom lowering with multiple results is // a complete mess. - SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Res.getNode()) { + if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) { if (!(Res.getNode() != Node || Res.getResNo() != 0)) return; @@ -1467,7 +1169,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { // Caches for hasPredecessorHelper SmallPtrSet<const SDNode *, 32> Visited; SmallVector<const SDNode *, 16> Worklist; - + Worklist.push_back(Idx.getNode()); SDValue StackPtr, Ch; for (SDNode::use_iterator UI = Vec.getNode()->use_begin(), UE = Vec.getNode()->use_end(); UI != UE; ++UI) { @@ -1485,7 +1187,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { // If the index is dependent on the store we will introduce a cycle when // creating the load (the load uses the index, and by replacing the chain // we will make the index dependent on the load). - if (Idx.getNode()->hasPredecessorHelper(ST, Visited, Worklist)) + if (SDNode::hasPredecessorHelper(ST, Visited, Worklist)) continue; StackPtr = ST->getBasePtr(); @@ -1498,7 +1200,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { // Store the value to a temporary stack slot, then LOAD the returned part. StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo()); } // Add the offset to the index. @@ -1513,12 +1215,12 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { SDValue NewLoad; if (Op.getValueType().isVector()) - NewLoad = DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, - MachinePointerInfo(), false, false, false, 0); + NewLoad = + DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, MachinePointerInfo()); else - NewLoad = DAG.getExtLoad( - ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(), - Vec.getValueType().getVectorElementType(), false, false, false, 0); + NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, + MachinePointerInfo(), + Vec.getValueType().getVectorElementType()); // Replace the chain going out of the store, by the one out of the load. DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1)); @@ -1549,8 +1251,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // First store the whole vector. - SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, - false, false, 0); + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); // Then store the inserted part. @@ -1566,12 +1267,10 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { StackPtr); // Store the subvector. - Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, - MachinePointerInfo(), false, false, 0); + Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, MachinePointerInfo()); // Finally, load the updated vector. - return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo, - false, false, false, 0); + return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo); } SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { @@ -1593,7 +1292,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { // Store (in the right endianness) the elements to memory. for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { // Ignore undef elements. - if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (Node->getOperand(i).isUndef()) continue; unsigned Offset = TypeByteSize*i; @@ -1605,13 +1304,10 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) { Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(i), Idx, - PtrInfo.getWithOffset(Offset), - EltVT, false, false, 0)); + PtrInfo.getWithOffset(Offset), EltVT)); } else - Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, - Node->getOperand(i), Idx, - PtrInfo.getWithOffset(Offset), - false, false, 0)); + Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i), + Idx, PtrInfo.getWithOffset(Offset))); } SDValue StoreChain; @@ -1621,8 +1317,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { StoreChain = DAG.getEntryNode(); // Result is a load from the stack slot. - return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, - false, false, false, 0); + return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo); } namespace { @@ -1645,7 +1340,8 @@ struct FloatSignAsInt { /// containing the sign bit if the target has no integer value capable of /// holding all bits of the floating-point value. void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, - SDLoc DL, SDValue Value) const { + const SDLoc &DL, + SDValue Value) const { EVT FloatVT = Value.getValueType(); unsigned NumBits = FloatVT.getSizeInBits(); State.FloatVT = FloatVT; @@ -1669,7 +1365,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, MachineFunction &MF = DAG.getMachineFunction(); State.FloatPointerInfo = MachinePointerInfo::getFixedStack(MF, FI); State.Chain = DAG.getStore(DAG.getEntryNode(), DL, Value, State.FloatPtr, - State.FloatPointerInfo, false, false, 0); + State.FloatPointerInfo); SDValue IntPtr; if (DataLayout.isBigEndian()) { @@ -1687,9 +1383,8 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, } State.IntPtr = IntPtr; - State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain, - IntPtr, State.IntPointerInfo, MVT::i8, - false, false, false, 0); + State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain, IntPtr, + State.IntPointerInfo, MVT::i8); State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7); State.SignBit = 7; } @@ -1697,16 +1392,16 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, /// Replace the integer value produced by getSignAsIntValue() with a new value /// and cast the result back to a floating-point type. SDValue SelectionDAGLegalize::modifySignAsInt(const FloatSignAsInt &State, - SDLoc DL, SDValue NewIntValue) const { + const SDLoc &DL, + SDValue NewIntValue) const { if (!State.Chain) return DAG.getNode(ISD::BITCAST, DL, State.FloatVT, NewIntValue); // Override the part containing the sign bit in the value stored on the stack. SDValue Chain = DAG.getTruncStore(State.Chain, DL, NewIntValue, State.IntPtr, - State.IntPointerInfo, MVT::i8, false, false, - 0); + State.IntPointerInfo, MVT::i8); return DAG.getLoad(State.FloatVT, DL, Chain, State.FloatPtr, - State.FloatPointerInfo, false, false, false, 0); + State.FloatPointerInfo); } SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const { @@ -1843,11 +1538,10 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, /// of a true/false result. /// /// \returns true if the SetCC has been legalized, false if it hasn't. -bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, - SDValue &LHS, SDValue &RHS, - SDValue &CC, +bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, + SDValue &RHS, SDValue &CC, bool &NeedInvert, - SDLoc dl) { + const SDLoc &dl) { MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); NeedInvert = false; @@ -1944,10 +1638,8 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, /// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does /// a load from the stack slot to DestVT, extending it if needed. /// The resultant code need not be legal. -SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, - EVT SlotVT, - EVT DestVT, - SDLoc dl) { +SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, + EVT DestVT, const SDLoc &dl) { // Create the stack frame object. unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment( SrcOp.getValueType().getTypeForEVT(*DAG.getContext())); @@ -1969,22 +1661,21 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, SDValue Store; if (SrcSize > SlotSize) - Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, - PtrInfo, SlotVT, false, false, SrcAlign); + Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo, + SlotVT, SrcAlign); else { assert(SrcSize == SlotSize && "Invalid store"); - Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, - PtrInfo, false, false, SrcAlign); + Store = + DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo, SrcAlign); } // Result is a load from the stack slot. if (SlotSize == DestSize) - return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, - false, false, false, DestAlign); + return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign); assert(SlotSize < DestSize && "Unknown extension!"); - return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, - PtrInfo, SlotVT, false, false, false, DestAlign); + return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT, + DestAlign); } SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { @@ -1999,11 +1690,10 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { SDValue Ch = DAG.getTruncStore( DAG.getEntryNode(), dl, Node->getOperand(0), StackPtr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), - Node->getValueType(0).getVectorElementType(), false, false, 0); + Node->getValueType(0).getVectorElementType()); return DAG.getLoad( Node->getValueType(0), dl, Ch, StackPtr, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false, - false, false, 0); + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI)); } static bool @@ -2025,7 +1715,7 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, NewIntermedVals; for (unsigned i = 0; i < NumElems; ++i) { SDValue V = Node->getOperand(i); - if (V.getOpcode() == ISD::UNDEF) + if (V.isUndef()) continue; SDValue Vec; @@ -2044,7 +1734,7 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, SmallVector<int, 16> FinalIndices; FinalIndices.reserve(IntermedVals[i].second.size() + IntermedVals[i+1].second.size()); - + int k = 0; for (unsigned j = 0, f = IntermedVals[i].second.size(); j != f; ++j, ++k) { @@ -2061,7 +1751,7 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, if (Phase) Shuffle = DAG.getVectorShuffle(VT, dl, IntermedVals[i].first, IntermedVals[i+1].first, - ShuffleVec.data()); + ShuffleVec); else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) return false; NewIntermedVals.push_back( @@ -2092,7 +1782,7 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, ShuffleVec[IntermedVals[1].second[i]] = NumElems + i; if (Phase) - Res = DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data()); + Res = DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec); else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) return false; } @@ -2117,7 +1807,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { bool isConstant = true; for (unsigned i = 0; i < NumElems; ++i) { SDValue V = Node->getOperand(i); - if (V.getOpcode() == ISD::UNDEF) + if (V.isUndef()) continue; if (i > 0) isOnlyLowElement = false; @@ -2160,7 +1850,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { CI->getZExtValue())); } } else { - assert(Node->getOperand(i).getOpcode() == ISD::UNDEF); + assert(Node->getOperand(i).isUndef()); Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext()); CV.push_back(UndefValue::get(OpNTy)); } @@ -2171,13 +1861,13 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); return DAG.getLoad( VT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, Alignment); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + Alignment); } SmallSet<SDValue, 16> DefinedValues; for (unsigned i = 0; i < NumElems; ++i) { - if (Node->getOperand(i).getOpcode() == ISD::UNDEF) + if (Node->getOperand(i).isUndef()) continue; DefinedValues.insert(Node->getOperand(i)); } @@ -2187,7 +1877,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { SmallVector<int, 8> ShuffleVec(NumElems, -1); for (unsigned i = 0; i < NumElems; ++i) { SDValue V = Node->getOperand(i); - if (V.getOpcode() == ISD::UNDEF) + if (V.isUndef()) continue; ShuffleVec[i] = V == Value1 ? 0 : NumElems; } @@ -2201,7 +1891,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { Vec2 = DAG.getUNDEF(VT); // Return shuffle(LowValVec, undef, <0,0,0,0>) - return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data()); + return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec); } } else { SDValue Res; @@ -2243,15 +1933,18 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, SDValue InChain = DAG.getEntryNode(); // isTailCall may be true since the callee does not reference caller stack - // frame. Check if it's in the right position. + // frame. Check if it's in the right position and that the return types match. SDValue TCChain = InChain; - bool isTailCall = TLI.isInTailCallPosition(DAG, Node, TCChain); + const Function *F = DAG.getMachineFunction().getFunction(); + bool isTailCall = + TLI.isInTailCallPosition(DAG, Node, TCChain) && + (RetTy == F->getReturnType() || F->getReturnType()->isVoidTy()); if (isTailCall) InChain = TCChain; TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setTailCall(isTailCall).setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -2267,7 +1960,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, /// and returning a result of type RetVT. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, - bool isSigned, SDLoc dl) { + bool isSigned, const SDLoc &dl) { TargetLowering::ArgListTy Args; Args.reserve(NumOps); @@ -2286,7 +1979,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -2320,7 +2013,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -2415,14 +2108,14 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, SDLoc dl(Node); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(InChain) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); // Remainder is loaded back from the stack frame. - SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, - MachinePointerInfo(), false, false, false, 0); + SDValue Rem = + DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, MachinePointerInfo()); Results.push_back(CallInfo.first); Results.push_back(Rem); } @@ -2449,8 +2142,7 @@ static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, return false; // GNU sin/cos functions set errno while sincos does not. Therefore // combining sin and cos is only safe if unsafe-fpmath is enabled. - bool isGNU = Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU; - if (isGNU && !TM.Options.UnsafeFPMath) + if (TM.getTargetTriple().isGNUEnvironment() && !TM.Options.UnsafeFPMath) return false; return true; } @@ -2528,26 +2220,25 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(InChain) .setCallee(TLI.getLibcallCallingConv(LC), - Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args), 0); + Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args)); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); - Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr, - MachinePointerInfo(), false, false, false, 0)); - Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr, - MachinePointerInfo(), false, false, false, 0)); + Results.push_back( + DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr, MachinePointerInfo())); + Results.push_back( + DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr, MachinePointerInfo())); } /// This function is responsible for legalizing a /// INT_TO_FP operation of the specified operand when the target requests that /// we expand it. At this point, we know that the result and operand types are /// legal for the target. -SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, - SDValue Op0, +SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, - SDLoc dl) { + const SDLoc &dl) { // TODO: Should any fast-math-flags be set for the created nodes? - + if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { // simple 32-bit [signed|unsigned] integer to float/double expansion @@ -2574,18 +2265,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, Op0Mapped = Op0; } // store the lo of the constructed double - based on integer input - SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, - Op0Mapped, Lo, MachinePointerInfo(), - false, false, 0); + SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op0Mapped, Lo, + MachinePointerInfo()); // initial hi portion of constructed double SDValue InitialHi = DAG.getConstant(0x43300000u, dl, MVT::i32); // store the hi of the constructed double - biased exponent - SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi, - MachinePointerInfo(), - false, false, 0); + SDValue Store2 = + DAG.getStore(Store1, dl, InitialHi, Hi, MachinePointerInfo()); // load the constructed double - SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, - MachinePointerInfo(), false, false, false, 0); + SDValue Load = + DAG.getLoad(MVT::f64, dl, Store2, StackSlot, MachinePointerInfo()); // FP constant to bias correct the final result SDValue Bias = DAG.getConstantFP(isSigned ? BitsToDouble(0x4330000080000000ULL) : @@ -2733,13 +2422,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, if (DestVT == MVT::f32) FudgeInReg = DAG.getLoad( MVT::f32, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, - false, false, Alignment); + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + Alignment); else { SDValue Load = DAG.getExtLoad( ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, - false, false, false, Alignment); + Alignment); HandleSDNode Handle(Load); LegalizeOp(Load.getNode()); FudgeInReg = Handle.getValue(); @@ -2753,10 +2442,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, /// we promote it. At this point, we know that the result and operand types are /// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP /// operation that takes a larger input. -SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, - EVT DestVT, +SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, - SDLoc dl) { + const SDLoc &dl) { // First step, figure out the appropriate *INT_TO_FP operation to use. EVT NewInTy = LegalOp.getValueType(); @@ -2795,10 +2483,9 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, /// we promote it. At this point, we know that the result and operand types are /// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT /// operation that returns a larger result. -SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, - EVT DestVT, +SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, - SDLoc dl) { + const SDLoc &dl) { // First step, figure out the appropriate FP_TO*INT operation to use. EVT NewOutTy = DestVT; @@ -2835,11 +2522,11 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, } /// Open code the operations for BITREVERSE. -SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, SDLoc dl) { +SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) { EVT VT = Op.getValueType(); EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); unsigned Sz = VT.getScalarSizeInBits(); - + SDValue Tmp, Tmp2; Tmp = DAG.getConstant(0, dl, VT); for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) { @@ -2849,7 +2536,7 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, SDLoc dl) { else Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT)); - + APInt Shift(Sz, 1); Shift = Shift.shl(J); Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT)); @@ -2860,7 +2547,7 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, SDLoc dl) { } /// Open code the operations for BSWAP of the specified operation. -SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) { +SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) { EVT VT = Op.getValueType(); EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; @@ -2914,7 +2601,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) { /// Expand the specified bitcount instruction into operations. SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, - SDLoc dl) { + const SDLoc &dl) { switch (Opc) { default: llvm_unreachable("Cannot expand this yet!"); case ISD::CTPOP: { @@ -3046,6 +2733,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::FRAME_TO_ARGS_OFFSET: Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0))); break; + case ISD::EH_DWARF_CFA: { + SDValue CfaArg = DAG.getSExtOrTrunc(Node->getOperand(0), dl, + TLI.getPointerTy(DAG.getDataLayout())); + SDValue Offset = DAG.getNode(ISD::ADD, dl, + CfaArg.getValueType(), + DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl, + CfaArg.getValueType()), + CfaArg); + SDValue FA = DAG.getNode( + ISD::FRAMEADDR, dl, TLI.getPointerTy(DAG.getDataLayout()), + DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()))); + Results.push_back(DAG.getNode(ISD::ADD, dl, FA.getValueType(), + FA, Offset)); + break; + } case ISD::FLT_ROUNDS_: Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0))); break; @@ -3111,10 +2813,38 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { cast<AtomicSDNode>(Node)->getFailureOrdering(), cast<AtomicSDNode>(Node)->getSynchScope()); - SDValue Success = DAG.getSetCC(SDLoc(Node), Node->getValueType(1), - Res, Node->getOperand(2), ISD::SETEQ); + SDValue ExtRes = Res; + SDValue LHS = Res; + SDValue RHS = Node->getOperand(1); - Results.push_back(Res.getValue(0)); + EVT AtomicType = cast<AtomicSDNode>(Node)->getMemoryVT(); + EVT OuterType = Node->getValueType(0); + switch (TLI.getExtendForAtomicOps()) { + case ISD::SIGN_EXTEND: + LHS = DAG.getNode(ISD::AssertSext, dl, OuterType, Res, + DAG.getValueType(AtomicType)); + RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OuterType, + Node->getOperand(2), DAG.getValueType(AtomicType)); + ExtRes = LHS; + break; + case ISD::ZERO_EXTEND: + LHS = DAG.getNode(ISD::AssertZext, dl, OuterType, Res, + DAG.getValueType(AtomicType)); + RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2)); + ExtRes = LHS; + break; + case ISD::ANY_EXTEND: + LHS = DAG.getZeroExtendInReg(Res, dl, AtomicType); + RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2)); + break; + default: + llvm_unreachable("Invalid atomic op extension"); + } + + SDValue Success = + DAG.getSetCC(dl, Node->getValueType(1), LHS, RHS, ISD::SETEQ); + + Results.push_back(ExtRes.getValue(0)); Results.push_back(Success); Results.push_back(Res.getValue(1)); break; @@ -3400,7 +3130,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; } - + case ISD::FSIN: case ISD::FCOS: { EVT VT = Node->getValueType(0); @@ -3442,7 +3172,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue FloatVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op, DAG.getIntPtrConstant(0, dl)); Results.push_back( - DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, FloatVal)); + DAG.getNode(ISD::FP_TO_FP16, dl, Node->getValueType(0), FloatVal)); } } break; @@ -3760,10 +3490,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); SDValue LD = DAG.getExtLoad( ISD::SEXTLOAD, dl, PTy, Chain, Addr, - MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT, - false, false, false, 0); + MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT); Addr = LD; - if (TM.getRelocationModel() == Reloc::PIC_) { + if (TM.isPositionIndependent()) { // For PIC, the sequence is: // BRIND(load(Jumptable + index) + RelocBase) // RelocBase can be JumpTable, GOT or some sort of global base. @@ -3786,7 +3515,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(2)); } else { // We test only the i1 bit. Skip the AND if UNDEF. - Tmp3 = (Tmp2.getOpcode() == ISD::UNDEF) ? Tmp2 : + Tmp3 = (Tmp2.isUndef()) ? Tmp2 : DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2, DAG.getConstant(1, dl, Tmp2.getValueType())); Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, @@ -4008,7 +3737,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol("__sync_synchronize", TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args), 0); + std::move(Args)); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); @@ -4031,7 +3760,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { case ISD::ATOMIC_LOAD_UMAX: case ISD::ATOMIC_CMP_SWAP: { MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); - RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); + RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); std::pair<SDValue, SDValue> Tmp = ExpandChainLibCall(LC, Node, false); @@ -4048,7 +3777,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol("abort", TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args), 0); + std::move(Args)); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); Results.push_back(CallResult.second); @@ -4269,18 +3998,20 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::CTPOP: // Zero extend the argument. Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); + if (Node->getOpcode() == ISD::CTTZ) { + // The count is the same in the promoted type except if the original + // value was zero. This can be handled by setting the bit just off + // the top of the original type. + auto TopBit = APInt::getOneBitSet(NVT.getSizeInBits(), + OVT.getSizeInBits()); + Tmp1 = DAG.getNode(ISD::OR, dl, NVT, Tmp1, + DAG.getConstant(TopBit, dl, NVT)); + } // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is // already the correct result. Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); - if (Node->getOpcode() == ISD::CTTZ) { - // FIXME: This should set a bit in the zero extended value instead. - Tmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), - Tmp1, DAG.getConstant(NVT.getSizeInBits(), dl, NVT), - ISD::SETEQ); - Tmp1 = DAG.getSelect(dl, NVT, Tmp2, - DAG.getConstant(OVT.getSizeInBits(), dl, NVT), Tmp1); - } else if (Node->getOpcode() == ISD::CTLZ || - Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { + if (Node->getOpcode() == ISD::CTLZ || + Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1, DAG.getConstant(NVT.getSizeInBits() - diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 6c0193a..31ebf7b 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -149,9 +149,26 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) { if (isLegalInHWReg(N->getValueType(ResNo))) return SDValue(N, ResNo); ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N); - return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN), - TLI.getTypeToTransformTo(*DAG.getContext(), - CN->getValueType(0))); + // In ppcf128, the high 64 bits are always first in memory regardless + // of Endianness. LLVM's APFloat representation is not Endian sensitive, + // and so always converts into a 128-bit APInt in a non-Endian-sensitive + // way. However, APInt's are serialized in an Endian-sensitive fashion, + // so on big-Endian targets, the two doubles are output in the wrong + // order. Fix this by manually flipping the order of the high 64 bits + // and the low 64 bits here. + if (DAG.getDataLayout().isBigEndian() && + CN->getValueType(0).getSimpleVT() == llvm::MVT::ppcf128) { + uint64_t words[2] = { CN->getValueAPF().bitcastToAPInt().getRawData()[1], + CN->getValueAPF().bitcastToAPInt().getRawData()[0] }; + APInt Val(128, words); + return DAG.getConstant(Val, SDLoc(CN), + TLI.getTypeToTransformTo(*DAG.getContext(), + CN->getValueType(0))); + } else { + return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN), + TLI.getTypeToTransformTo(*DAG.getContext(), + CN->getValueType(0))); + } } SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -614,12 +631,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDLoc dl(N); + auto MMOFlags = + L->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant; SDValue NewL; if (L->getExtensionType() == ISD::NON_EXTLOAD) { - NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), - NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), - L->getPointerInfo(), NVT, L->isVolatile(), - L->isNonTemporal(), false, L->getAlignment(), + NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl, + L->getChain(), L->getBasePtr(), L->getOffset(), + L->getPointerInfo(), NVT, L->getAlignment(), MMOFlags, L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -629,12 +647,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) { } // Do a non-extending load followed by FP_EXTEND. - NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, - L->getMemoryVT(), dl, L->getChain(), - L->getBasePtr(), L->getOffset(), L->getPointerInfo(), - L->getMemoryVT(), L->isVolatile(), - L->isNonTemporal(), false, L->getAlignment(), - L->getAAInfo()); + NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, L->getMemoryVT(), + dl, L->getChain(), L->getBasePtr(), L->getOffset(), + L->getPointerInfo(), L->getMemoryVT(), L->getAlignment(), + MMOFlags, L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -800,6 +816,7 @@ bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::FCOPYSIGN: case ISD::FNEG: case ISD::Register: + case ISD::SELECT: return true; } return false; @@ -1516,7 +1533,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - SDLoc dl) { + const SDLoc &dl) { SDValue LHSLo, LHSHi, RHSLo, RHSHi; GetExpandedFloat(NewLHS, LHSLo, LHSHi); GetExpandedFloat(NewRHS, RHSLo, RHSHi); @@ -1868,6 +1885,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { // Binary FP Operations case ISD::FADD: case ISD::FDIV: + case ISD::FMAXNAN: + case ISD::FMINNAN: case ISD::FMAXNUM: case ISD::FMINNUM: case ISD::FMUL: @@ -2063,13 +2082,14 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) { LoadSDNode *L = cast<LoadSDNode>(N); EVT VT = N->getValueType(0); - // Load the value as an integer value with the same number of bits + // Load the value as an integer value with the same number of bits. EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); - SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), - IVT, SDLoc(N), L->getChain(), L->getBasePtr(), - L->getOffset(), L->getPointerInfo(), IVT, L->isVolatile(), - L->isNonTemporal(), false, L->getAlignment(), - L->getAAInfo()); + auto MMOFlags = + L->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant; + SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), IVT, + SDLoc(N), L->getChain(), L->getBasePtr(), + L->getOffset(), L->getPointerInfo(), IVT, + L->getAlignment(), MMOFlags, L->getAAInfo()); // Legalize the chain result by replacing uses of the old value chain with the // new one ReplaceValueWith(SDValue(N, 1), newL.getValue(1)); @@ -2102,9 +2122,14 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT_CC(SDNode *N) { // Construct a SDNode that transforms the SINT or UINT operand to the promoted // float type. SDValue DAGTypeLegalizer::PromoteFloatRes_XINT_TO_FP(SDNode *N) { + SDLoc DL(N); EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, N->getOperand(0)); + SDValue NV = DAG.getNode(N->getOpcode(), DL, NVT, N->getOperand(0)); + // Round the value to the desired precision (that of the source type). + return DAG.getNode( + ISD::FP_EXTEND, DL, NVT, + DAG.getNode(ISD::FP_ROUND, DL, VT, NV, DAG.getIntPtrConstant(0, DL))); } SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 74f80db..9a18943 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -436,10 +436,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); - SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); - - return DAG.getNode(ISD::AssertZext, dl, - NVT, Res, DAG.getValueType(N->getValueType(0))); + return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); } SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { @@ -1374,6 +1371,11 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::OR: case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break; + case ISD::UMAX: + case ISD::SMAX: + case ISD::UMIN: + case ISD::SMIN: ExpandIntRes_MINMAX(N, Lo, Hi); break; + case ISD::ADD: case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break; @@ -1404,7 +1406,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { unsigned Opc = Node->getOpcode(); MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); - RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); + RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); return ExpandChainLibCall(LC, Node, false); @@ -1442,15 +1444,6 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt, } else if (Amt == NVTBits) { Lo = DAG.getConstant(0, DL, NVT); Hi = InL; - } else if (Amt == 1 && - TLI.isOperationLegalOrCustom(ISD::ADDC, - TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) { - // Emit this X << 1 as X+X. - SDVTList VTList = DAG.getVTList(NVT, MVT::Glue); - SDValue LoOps[2] = { InL, InL }; - Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps); - SDValue HiOps[3] = { InH, InH, Lo.getValue(1) }; - Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps); } else { Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, DL, ShTy)); Hi = DAG.getNode(ISD::OR, DL, NVT, @@ -1675,6 +1668,54 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { } } +static std::pair<ISD::CondCode, ISD::NodeType> getExpandedMinMaxOps(int Op) { + + switch (Op) { + default: llvm_unreachable("invalid min/max opcode"); + case ISD::SMAX: + return std::make_pair(ISD::SETGT, ISD::UMAX); + case ISD::UMAX: + return std::make_pair(ISD::SETUGT, ISD::UMAX); + case ISD::SMIN: + return std::make_pair(ISD::SETLT, ISD::UMIN); + case ISD::UMIN: + return std::make_pair(ISD::SETULT, ISD::UMIN); + } +} + +void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc DL(N); + ISD::NodeType LoOpc; + ISD::CondCode CondC; + std::tie(CondC, LoOpc) = getExpandedMinMaxOps(N->getOpcode()); + + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + + // Value types + EVT NVT = LHSL.getValueType(); + EVT CCT = getSetCCResultType(NVT); + + // Hi part is always the same op + Hi = DAG.getNode(N->getOpcode(), DL, {NVT, NVT}, {LHSH, RHSH}); + + // We need to know whether to select Lo part that corresponds to 'winning' + // Hi part or if Hi parts are equal. + SDValue IsHiLeft = DAG.getSetCC(DL, CCT, LHSH, RHSH, CondC); + SDValue IsHiEq = DAG.getSetCC(DL, CCT, LHSH, RHSH, ISD::SETEQ); + + // Lo part corresponding to the 'winning' Hi part + SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL); + + // Recursed Lo part if Hi parts are equal, this uses unsigned version + SDValue LoMinMax = DAG.getNode(LoOpc, DL, {NVT, NVT}, {LHSL, RHSL}); + + Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp); +} + void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); @@ -2006,9 +2047,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, SDValue Ptr = N->getBasePtr(); ISD::LoadExtType ExtType = N->getExtensionType(); unsigned Alignment = N->getAlignment(); - bool isVolatile = N->isVolatile(); - bool isNonTemporal = N->isNonTemporal(); - bool isInvariant = N->isInvariant(); + MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); AAMDNodes AAInfo = N->getAAInfo(); SDLoc dl(N); @@ -2017,9 +2056,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, if (N->getMemoryVT().bitsLE(NVT)) { EVT MemVT = N->getMemoryVT(); - Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), - MemVT, isVolatile, isNonTemporal, isInvariant, - Alignment, AAInfo); + Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), MemVT, + Alignment, MMOFlags, AAInfo); // Remember the chain. Ch = Lo.getValue(1); @@ -2041,8 +2079,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } } else if (DAG.getDataLayout().isLittleEndian()) { // Little-endian - low bits are at low addresses. - Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Alignment, + Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), Alignment, MMOFlags, AAInfo); unsigned ExcessBits = @@ -2055,8 +2092,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, - isVolatile, isNonTemporal, isInvariant, - MinAlign(Alignment, IncrementSize), AAInfo); + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -2074,8 +2110,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits() - ExcessBits), - isVolatile, isNonTemporal, isInvariant, Alignment, - AAInfo); + Alignment, MMOFlags, AAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -2084,8 +2119,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), - isVolatile, isNonTemporal, isInvariant, - MinAlign(Alignment, IncrementSize), AAInfo); + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -2145,7 +2179,54 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, LC = RTLIB::MUL_I64; else if (VT == MVT::i128) LC = RTLIB::MUL_I128; - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!"); + + if (LC == RTLIB::UNKNOWN_LIBCALL) { + // We'll expand the multiplication by brute force because we have no other + // options. This is a trivially-generalized version of the code from + // Hacker's Delight (itself derived from Knuth's Algorithm M from section + // 4.3.1). + unsigned Bits = NVT.getSizeInBits(); + unsigned HalfBits = Bits >> 1; + SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, + NVT); + SDValue LLL = DAG.getNode(ISD::AND, dl, NVT, LL, Mask); + SDValue RLL = DAG.getNode(ISD::AND, dl, NVT, RL, Mask); + + SDValue T = DAG.getNode(ISD::MUL, dl, NVT, LLL, RLL); + SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask); + + EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); + if (APInt::getMaxValue(ShiftAmtTy.getSizeInBits()).ult(HalfBits)) { + // The type from TLI is too small to fit the shift amount we want. + // Override it with i32. The shift will have to be legalized. + ShiftAmtTy = MVT::i32; + } + SDValue Shift = DAG.getConstant(HalfBits, dl, ShiftAmtTy); + SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift); + SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift); + SDValue RLH = DAG.getNode(ISD::SRL, dl, NVT, RL, Shift); + + SDValue U = DAG.getNode(ISD::ADD, dl, NVT, + DAG.getNode(ISD::MUL, dl, NVT, LLH, RLL), TH); + SDValue UL = DAG.getNode(ISD::AND, dl, NVT, U, Mask); + SDValue UH = DAG.getNode(ISD::SRL, dl, NVT, U, Shift); + + SDValue V = DAG.getNode(ISD::ADD, dl, NVT, + DAG.getNode(ISD::MUL, dl, NVT, LLL, RLH), UL); + SDValue VH = DAG.getNode(ISD::SRL, dl, NVT, V, Shift); + + SDValue W = DAG.getNode(ISD::ADD, dl, NVT, + DAG.getNode(ISD::MUL, dl, NVT, LLH, RLH), + DAG.getNode(ISD::ADD, dl, NVT, UH, VH)); + Lo = DAG.getNode(ISD::ADD, dl, NVT, TL, + DAG.getNode(ISD::SHL, dl, NVT, V, Shift)); + + Hi = DAG.getNode(ISD::ADD, dl, NVT, W, + DAG.getNode(ISD::ADD, dl, NVT, + DAG.getNode(ISD::MUL, dl, NVT, RH, LL), + DAG.getNode(ISD::MUL, dl, NVT, RL, LH))); + return; + } SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first, @@ -2495,9 +2576,9 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue Temp = DAG.CreateStackTemporary(PtrVT); // Temporary for the overflow value, default it to zero. - SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, - DAG.getConstant(0, dl, PtrVT), Temp, - MachinePointerInfo(), false, false, 0); + SDValue Chain = + DAG.getStore(DAG.getEntryNode(), dl, DAG.getConstant(0, dl, PtrVT), Temp, + MachinePointerInfo()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -2522,14 +2603,14 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args), 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args)) .setSExtResult(); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); SplitInteger(CallInfo.first, Lo, Hi); - SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, - MachinePointerInfo(), false, false, false, 0); + SDValue Temp2 = + DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, MachinePointerInfo()); SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2, DAG.getConstant(0, dl, PtrVT), ISD::SETNE); @@ -2703,7 +2784,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - SDLoc dl) { + const SDLoc &dl) { SDValue LHSLo, LHSHi, RHSLo, RHSHi; GetExpandedInteger(NewLHS, LHSLo, LHSHi); GetExpandedInteger(NewRHS, RHSLo, RHSHi); @@ -2956,8 +3037,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); unsigned Alignment = N->getAlignment(); - bool isVolatile = N->isVolatile(); - bool isNonTemporal = N->isNonTemporal(); + MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); AAMDNodes AAInfo = N->getAAInfo(); SDLoc dl(N); SDValue Lo, Hi; @@ -2967,16 +3047,15 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { if (N->getMemoryVT().bitsLE(NVT)) { GetExpandedInteger(N->getValue(), Lo, Hi); return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), - N->getMemoryVT(), isVolatile, isNonTemporal, - Alignment, AAInfo); + N->getMemoryVT(), Alignment, MMOFlags, AAInfo); } if (DAG.getDataLayout().isLittleEndian()) { // Little-endian - low bits are at low addresses. GetExpandedInteger(N->getValue(), Lo, Hi); - Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, AAInfo); + Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags, + AAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -2986,10 +3065,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); - Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, - N->getPointerInfo().getWithOffset(IncrementSize), - NEVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), AAInfo); + Hi = DAG.getTruncStore( + Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -3017,8 +3095,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { } // Store both the high bits and maybe some of the low bits. - Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), - HiVT, isVolatile, isNonTemporal, Alignment, AAInfo); + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), HiVT, Alignment, + MMOFlags, AAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -3027,8 +3105,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), - isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), AAInfo); + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -3104,7 +3181,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Fudge = DAG.getExtLoad( ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), FudgePtr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, - false, false, false, Alignment); + Alignment); return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 2a0b0aa..144bed2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -27,7 +27,7 @@ using namespace llvm; static cl::opt<bool> EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden); -/// PerformExpensiveChecks - Do extensive, expensive, sanity checking. +/// Do extensive, expensive, sanity checking. void DAGTypeLegalizer::PerformExpensiveChecks() { // If a node is not processed, then none of its values should be mapped by any // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues. @@ -174,9 +174,9 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { } } -/// run - This is the main entry point for the type legalizer. This does a -/// top-down traversal of the dag, legalizing types as it goes. Returns "true" -/// if it made any changes. +/// This is the main entry point for the type legalizer. This does a top-down +/// traversal of the dag, legalizing types as it goes. Returns "true" if it made +/// any changes. bool DAGTypeLegalizer::run() { bool Changed = false; @@ -204,7 +204,7 @@ bool DAGTypeLegalizer::run() { // Now that we have a set of nodes to process, handle them all. while (!Worklist.empty()) { -#ifndef XDEBUG +#ifndef EXPENSIVE_CHECKS if (EnableExpensiveChecks) #endif PerformExpensiveChecks(); @@ -394,7 +394,7 @@ NodeDone: } } -#ifndef XDEBUG +#ifndef EXPENSIVE_CHECKS if (EnableExpensiveChecks) #endif PerformExpensiveChecks(); @@ -461,11 +461,10 @@ NodeDone: return Changed; } -/// AnalyzeNewNode - The specified node is the root of a subtree of potentially -/// new nodes. Correct any processed operands (this may change the node) and -/// calculate the NodeId. If the node itself changes to a processed node, it -/// is not remapped - the caller needs to take care of this. -/// Returns the potentially changed node. +/// The specified node is the root of a subtree of potentially new nodes. +/// Correct any processed operands (this may change the node) and calculate the +/// NodeId. If the node itself changes to a processed node, it is not remapped - +/// the caller needs to take care of this. Returns the potentially changed node. SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { // If this was an existing node that is already done, we're done. if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed) @@ -536,7 +535,7 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { return N; } -/// AnalyzeNewValue - Call AnalyzeNewNode, updating the node in Val if needed. +/// Call AnalyzeNewNode, updating the node in Val if needed. /// If the node changes to a processed node, then remap it. void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) { Val.setNode(AnalyzeNewNode(Val.getNode())); @@ -545,7 +544,7 @@ void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) { RemapValue(Val); } -/// ExpungeNode - If N has a bogus mapping in ReplacedValues, eliminate it. +/// If N has a bogus mapping in ReplacedValues, eliminate it. /// This can occur when a node is deleted then reallocated as a new node - /// the mapping in ReplacedValues applies to the deleted node, not the new /// one. @@ -626,7 +625,7 @@ void DAGTypeLegalizer::ExpungeNode(SDNode *N) { ReplacedValues.erase(SDValue(N, i)); } -/// RemapValue - If the specified value was already legalized to another value, +/// If the specified value was already legalized to another value, /// replace it by that value. void DAGTypeLegalizer::RemapValue(SDValue &N) { DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N); @@ -643,8 +642,8 @@ void DAGTypeLegalizer::RemapValue(SDValue &N) { } namespace { - /// NodeUpdateListener - This class is a DAGUpdateListener that listens for - /// updates to nodes and recomputes their ready state. + /// This class is a DAGUpdateListener that listens for updates to nodes and + /// recomputes their ready state. class NodeUpdateListener : public SelectionDAG::DAGUpdateListener { DAGTypeLegalizer &DTL; SmallSetVector<SDNode*, 16> &NodesToAnalyze; @@ -689,9 +688,8 @@ namespace { } -/// ReplaceValueWith - The specified value was legalized to the specified other -/// value. Update the DAG and NodeIds replacing any uses of From to use To -/// instead. +/// The specified value was legalized to the specified other value. +/// Update the DAG and NodeIds replacing any uses of From to use To instead. void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { assert(From.getNode() != To.getNode() && "Potential legalization loop!"); @@ -905,15 +903,14 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { // Utilities. //===----------------------------------------------------------------------===// -/// BitConvertToInteger - Convert to an integer of the same size. +/// Convert to an integer of the same size. SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) { unsigned BitWidth = Op.getValueType().getSizeInBits(); return DAG.getNode(ISD::BITCAST, SDLoc(Op), EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op); } -/// BitConvertVectorToIntegerVector - Convert to a vector of integers of the -/// same size. +/// Convert to a vector of integers of the same size. SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) { assert(Op.getValueType().isVector() && "Only applies to vectors!"); unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits(); @@ -930,15 +927,14 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, // the source and destination types. SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT); // Emit a store to the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, - MachinePointerInfo(), false, false, 0); + SDValue Store = + DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, MachinePointerInfo()); // Result is a load from the stack slot. - return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(), - false, false, false, 0); + return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo()); } -/// CustomLowerNode - Replace the node's results with custom code provided -/// by the target and return "true", or do nothing and return "false". +/// Replace the node's results with custom code provided by the target and +/// return "true", or do nothing and return "false". /// The last parameter is FALSE if we are dealing with a node with legal /// result types and illegal operand. The second parameter denotes the type of /// illegal OperandNo in that case. @@ -981,8 +977,8 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { } -/// CustomWidenLowerNode - Widen the node's results with custom code provided -/// by the target and return "true", or do nothing and return "false". +/// Widen the node's results with custom code provided by the target and return +/// "true", or do nothing and return "false". bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) { // See if the target wants to custom lower this node. if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom) @@ -992,7 +988,7 @@ bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) { TLI.ReplaceNodeResults(N, Results, DAG); if (Results.empty()) - // The target didn't want to custom widen lower its result after all. + // The target didn't want to custom widen lower its result after all. return false; // Update the widening map. @@ -1010,8 +1006,8 @@ SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) { return SDValue(N->getOperand(ResNo)); } -/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and -/// high parts of the given value. +/// Use ISD::EXTRACT_ELEMENT nodes to extract the low and high parts of the +/// given value. void DAGTypeLegalizer::GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi) { SDLoc dl(Pair); @@ -1038,7 +1034,7 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr); } -/// JoinIntegers - Build an integer with low bits Lo and high bits Hi. +/// Build an integer with low bits Lo and high bits Hi. SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { // Arbitrarily use dlHi for result SDLoc SDLoc dlHi(Hi); @@ -1056,7 +1052,7 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi); } -/// LibCallify - Convert the node into a libcall with the same prototype. +/// Convert the node into a libcall with the same prototype. SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned) { unsigned NumOps = N->getNumOperands(); @@ -1080,12 +1076,11 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first; } -// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to -// ExpandLibCall except that the first operand is the in-chain. +/// Expand a node into a call to a libcall. Similar to ExpandLibCall except that +/// the first operand is the in-chain. std::pair<SDValue, SDValue> -DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, - SDNode *Node, - bool isSigned) { +DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, + bool isSigned) { SDValue InChain = Node->getOperand(0); TargetLowering::ArgListTy Args; @@ -1106,7 +1101,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -1114,9 +1109,9 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, return CallInfo; } -/// PromoteTargetBoolean - Promote the given target boolean to a target boolean -/// of the given type. A target boolean is an integer value, not necessarily of -/// type i1, the bits of which conform to getBooleanContents. +/// Promote the given target boolean to a target boolean of the given type. +/// A target boolean is an integer value, not necessarily of type i1, the bits +/// of which conform to getBooleanContents. /// /// ValVT is the type of values that produced the boolean. SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) { @@ -1127,9 +1122,9 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) { return DAG.getNode(ExtendCode, dl, BoolVT, Bool); } -/// WidenTargetBoolean - Widen the given target boolean to a target boolean -/// of the given type. The boolean vector is widened and then promoted to match -/// the target boolean type of the given ValVT. +/// Widen the given target boolean to a target boolean of the given type. +/// The boolean vector is widened and then promoted to match the target boolean +/// type of the given ValVT. SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT, bool WithZeroes) { SDLoc dl(Bool); @@ -1144,8 +1139,7 @@ SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT, return PromoteTargetBoolean(Bool, ValVT); } -/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT -/// bits in Hi. +/// Return the lower LoVT bits of Op in Lo and the upper HiVT bits in Hi. void DAGTypeLegalizer::SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, SDValue &Lo, SDValue &Hi) { @@ -1159,8 +1153,8 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op, Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); } -/// SplitInteger - Return the lower and upper halves of Op's bits in a value -/// type half the size of Op's. +/// Return the lower and upper halves of Op's bits in a value type half the +/// size of Op's. void DAGTypeLegalizer::SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi) { EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), @@ -1173,9 +1167,8 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op, // Entry Point //===----------------------------------------------------------------------===// -/// LegalizeTypes - This transforms the SelectionDAG into a SelectionDAG that -/// only uses types natively supported by the target. Returns "true" if it made -/// any changes. +/// This transforms the SelectionDAG into a SelectionDAG that only uses types +/// natively supported by the target. Returns "true" if it made any changes. /// /// Note that this is an involved process that may invalidate pointers into /// the graph. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 8ba19f7..84ad8f8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -17,7 +17,6 @@ #define LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" @@ -26,58 +25,56 @@ namespace llvm { //===----------------------------------------------------------------------===// -/// DAGTypeLegalizer - This takes an arbitrary SelectionDAG as input and hacks -/// on it until only value types the target machine can handle are left. This -/// involves promoting small sizes to large sizes or splitting up large values -/// into small values. +/// This takes an arbitrary SelectionDAG as input and hacks on it until only +/// value types the target machine can handle are left. This involves promoting +/// small sizes to large sizes or splitting up large values into small values. /// class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { const TargetLowering &TLI; SelectionDAG &DAG; public: - // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information - // about the state of the node. The enum has all the values. + /// This pass uses the NodeId on the SDNodes to hold information about the + /// state of the node. The enum has all the values. enum NodeIdFlags { - /// ReadyToProcess - All operands have been processed, so this node is ready - /// to be handled. + /// All operands have been processed, so this node is ready to be handled. ReadyToProcess = 0, - /// NewNode - This is a new node, not before seen, that was created in the - /// process of legalizing some other node. + /// This is a new node, not before seen, that was created in the process of + /// legalizing some other node. NewNode = -1, - /// Unanalyzed - This node's ID needs to be set to the number of its - /// unprocessed operands. + /// This node's ID needs to be set to the number of its unprocessed + /// operands. Unanalyzed = -2, - /// Processed - This is a node that has already been processed. + /// This is a node that has already been processed. Processed = -3 // 1+ - This is a node which has this many unprocessed operands. }; private: - /// ValueTypeActions - This is a bitvector that contains two bits for each - /// simple value type, where the two bits correspond to the LegalizeAction - /// enum from TargetLowering. This can be queried with "getTypeAction(VT)". + /// This is a bitvector that contains two bits for each simple value type, + /// where the two bits correspond to the LegalizeAction enum from + /// TargetLowering. This can be queried with "getTypeAction(VT)". TargetLowering::ValueTypeActionImpl ValueTypeActions; - /// getTypeAction - Return how we should legalize values of this type. + /// Return how we should legalize values of this type. TargetLowering::LegalizeTypeAction getTypeAction(EVT VT) const { return TLI.getTypeAction(*DAG.getContext(), VT); } - /// isTypeLegal - Return true if this type is legal on this target. + /// Return true if this type is legal on this target. bool isTypeLegal(EVT VT) const { return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal; } - /// isSimpleLegalType - Return true if this is a simple legal type. + /// Return true if this is a simple legal type. bool isSimpleLegalType(EVT VT) const { return VT.isSimple() && TLI.isTypeLegal(VT); } - /// isLegalInHWReg - Return true if this type can be passed in registers. + /// Return true if this type can be passed in registers. /// For example, x86_64's f128, should to be legally in registers /// and only some operations converted to library calls or integer /// bitwise operations. @@ -90,51 +87,49 @@ private: return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } - /// IgnoreNodeResults - Pretend all of this node's results are legal. + /// Pretend all of this node's results are legal. bool IgnoreNodeResults(SDNode *N) const { return N->getOpcode() == ISD::TargetConstant; } - /// PromotedIntegers - For integer nodes that are below legal width, this map - /// indicates what promoted value to use. + /// For integer nodes that are below legal width, this map indicates what + /// promoted value to use. SmallDenseMap<SDValue, SDValue, 8> PromotedIntegers; - /// ExpandedIntegers - For integer nodes that need to be expanded this map - /// indicates which operands are the expanded version of the input. + /// For integer nodes that need to be expanded this map indicates which + /// operands are the expanded version of the input. SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedIntegers; - /// SoftenedFloats - For floating point nodes converted to integers of - /// the same size, this map indicates the converted value to use. + /// For floating-point nodes converted to integers of the same size, this map + /// indicates the converted value to use. SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats; - /// PromotedFloats - For floating point nodes that have a smaller precision - /// than the smallest supported precision, this map indicates what promoted - /// value to use. + /// For floating-point nodes that have a smaller precision than the smallest + /// supported precision, this map indicates what promoted value to use. SmallDenseMap<SDValue, SDValue, 8> PromotedFloats; - /// ExpandedFloats - For float nodes that need to be expanded this map - /// indicates which operands are the expanded version of the input. + /// For float nodes that need to be expanded this map indicates which operands + /// are the expanded version of the input. SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats; - /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the - /// scalar value of type 'ty' to use. + /// For nodes that are <1 x ty>, this map indicates the scalar value of type + /// 'ty' to use. SmallDenseMap<SDValue, SDValue, 8> ScalarizedVectors; - /// SplitVectors - For nodes that need to be split this map indicates - /// which operands are the expanded version of the input. + /// For nodes that need to be split this map indicates which operands are the + /// expanded version of the input. SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> SplitVectors; - /// WidenedVectors - For vector nodes that need to be widened, indicates - /// the widened value to use. + /// For vector nodes that need to be widened, indicates the widened value to + /// use. SmallDenseMap<SDValue, SDValue, 8> WidenedVectors; - /// ReplacedValues - For values that have been replaced with another, - /// indicates the replacement value to use. + /// For values that have been replaced with another, indicates the replacement + /// value to use. SmallDenseMap<SDValue, SDValue, 8> ReplacedValues; - /// Worklist - This defines a worklist of nodes to process. In order to be - /// pushed onto this worklist, all operands of a node must have already been - /// processed. + /// This defines a worklist of nodes to process. In order to be pushed onto + /// this worklist, all operands of a node must have already been processed. SmallVector<SDNode*, 128> Worklist; public: @@ -145,7 +140,7 @@ public: "Too many value types for ValueTypeActions to hold!"); } - /// run - This is the main entry point for the type legalizer. This does a + /// This is the main entry point for the type legalizer. This does a /// top-down traversal of the dag, legalizing types as it goes. Returns /// "true" if it made any changes. bool run(); @@ -173,9 +168,9 @@ private: bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult); bool CustomWidenLowerNode(SDNode *N, EVT VT); - /// DisintegrateMERGE_VALUES - Replace each result of the given MERGE_VALUES - /// node with the corresponding input operand, except for the result 'ResNo', - /// for which the corresponding input operand is returned. + /// Replace each result of the given MERGE_VALUES node with the corresponding + /// input operand, except for the result 'ResNo', for which the corresponding + /// input operand is returned. SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo); SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index); @@ -201,9 +196,9 @@ private: // Integer Promotion Support: LegalizeIntegerTypes.cpp //===--------------------------------------------------------------------===// - /// GetPromotedInteger - Given a processed operand Op which was promoted to a - /// larger integer type, this returns the promoted value. The low bits of the - /// promoted value corresponding to the original type are exactly equal to Op. + /// Given a processed operand Op which was promoted to a larger integer type, + /// this returns the promoted value. The low bits of the promoted value + /// corresponding to the original type are exactly equal to Op. /// The extra bits contain rubbish, so the promoted value may need to be zero- /// or sign-extended from the original type before it is usable (the helpers /// SExtPromotedInteger and ZExtPromotedInteger can do this for you). @@ -218,8 +213,7 @@ private: } void SetPromotedInteger(SDValue Op, SDValue Result); - /// SExtPromotedInteger - Get a promoted operand and sign extend it to the - /// final size. + /// Get a promoted operand and sign extend it to the final size. SDValue SExtPromotedInteger(SDValue Op) { EVT OldVT = Op.getValueType(); SDLoc dl(Op); @@ -228,8 +222,7 @@ private: DAG.getValueType(OldVT)); } - /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the - /// final size. + /// Get a promoted operand and zero extend it to the final size. SDValue ZExtPromotedInteger(SDValue Op) { EVT OldVT = Op.getValueType(); SDLoc dl(Op); @@ -322,9 +315,9 @@ private: // Integer Expansion Support: LegalizeIntegerTypes.cpp //===--------------------------------------------------------------------===// - /// GetExpandedInteger - Given a processed operand Op which was expanded into - /// two integers of half the size, this returns the two halves. The low bits - /// of Op are exactly equal to the bits of Lo; the high bits exactly equal Hi. + /// Given a processed operand Op which was expanded into two integers of half + /// the size, this returns the two halves. The low bits of Op are exactly + /// equal to the bits of Lo; the high bits exactly equal Hi. /// For example, if Op is an i64 which was expanded into two i32's, then this /// method returns the two i32's, with Lo being equal to the lower 32 bits of /// Op, and Hi being equal to the upper 32 bits. @@ -362,6 +355,8 @@ private: void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_MINMAX (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -388,14 +383,14 @@ private: SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N); void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, - ISD::CondCode &CCCode, SDLoc dl); + ISD::CondCode &CCCode, const SDLoc &dl); //===--------------------------------------------------------------------===// // Float to Integer Conversion Support: LegalizeFloatTypes.cpp //===--------------------------------------------------------------------===// - /// GetSoftenedFloat - Given an operand Op of Float type, returns the integer - /// if the Op is not supported in target HW and converted to the integer. + /// Given an operand Op of Float type, returns the integer if the Op is not + /// supported in target HW and converted to the integer. /// The integer contains exactly the same bits as Op - only the type changed. /// For example, if Op is an f32 which was softened to an i32, then this method /// returns an i32, the bits of which coincide with those of Op. @@ -487,8 +482,8 @@ private: // Float Expansion Support: LegalizeFloatTypes.cpp //===--------------------------------------------------------------------===// - /// GetExpandedFloat - Given a processed operand Op which was expanded into - /// two floating point values of half the size, this returns the two halves. + /// Given a processed operand Op which was expanded into two floating-point + /// values of half the size, this returns the two halves. /// The low bits of Op are exactly equal to the bits of Lo; the high bits /// exactly equal Hi. For example, if Op is a ppcf128 which was expanded /// into two f64's, then this method returns the two f64's, with Lo being @@ -542,8 +537,7 @@ private: SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo); void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, - ISD::CondCode &CCCode, SDLoc dl); - + ISD::CondCode &CCCode, const SDLoc &dl); //===--------------------------------------------------------------------===// // Float promotion support: LegalizeFloatTypes.cpp @@ -586,9 +580,9 @@ private: // Scalarization Support: LegalizeVectorTypes.cpp //===--------------------------------------------------------------------===// - /// GetScalarizedVector - Given a processed one-element vector Op which was - /// scalarized to its element type, this returns the element. For example, - /// if Op is a v1i32, Op = < i32 val >, this method returns val, an i32. + /// Given a processed one-element vector Op which was scalarized to its + /// element type, this returns the element. For example, if Op is a v1i32, + /// Op = < i32 val >, this method returns val, an i32. SDValue GetScalarizedVector(SDValue Op) { SDValue &ScalarizedOp = ScalarizedVectors[Op]; RemapValue(ScalarizedOp); @@ -636,12 +630,12 @@ private: // Vector Splitting Support: LegalizeVectorTypes.cpp //===--------------------------------------------------------------------===// - /// GetSplitVector - Given a processed vector Op which was split into vectors - /// of half the size, this method returns the halves. The first elements of - /// Op coincide with the elements of Lo; the remaining elements of Op coincide - /// with the elements of Hi: Op is what you would get by concatenating Lo and - /// Hi. For example, if Op is a v8i32 that was split into two v4i32's, then - /// this method returns the two v4i32's, with Lo corresponding to the first 4 + /// Given a processed vector Op which was split into vectors of half the size, + /// this method returns the halves. The first elements of Op coincide with the + /// elements of Lo; the remaining elements of Op coincide with the elements of + /// Hi: Op is what you would get by concatenating Lo and Hi. + /// For example, if Op is a v8i32 that was split into two v4i32's, then this + /// method returns the two v4i32's, with Lo corresponding to the first 4 /// elements of Op, and Hi to the last 4 elements. void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi); void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi); @@ -653,6 +647,7 @@ private: void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -692,12 +687,12 @@ private: // Vector Widening Support: LegalizeVectorTypes.cpp //===--------------------------------------------------------------------===// - /// GetWidenedVector - Given a processed vector Op which was widened into a - /// larger vector, this method returns the larger vector. The elements of - /// the returned vector consist of the elements of Op followed by elements - /// containing rubbish. For example, if Op is a v2i32 that was widened to a - /// v4i32, then this method returns a v4i32 for which the first two elements - /// are the same as those of Op, while the last two elements contain rubbish. + /// Given a processed vector Op which was widened into a larger vector, this + /// method returns the larger vector. The elements of the returned vector + /// consist of the elements of Op followed by elements containing rubbish. + /// For example, if Op is a v2i32 that was widened to a v4i32, then this + /// method returns a v4i32 for which the first two elements are the same as + /// those of Op, while the last two elements contain rubbish. SDValue GetWidenedVector(SDValue Op) { SDValue &WidenedOp = WidenedVectors[Op]; RemapValue(WidenedOp); @@ -713,6 +708,7 @@ private: SDValue WidenVecRes_BUILD_VECTOR(SDNode* N); SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N); SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N); + SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N); SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); @@ -755,29 +751,29 @@ private: // Vector Widening Utilities Support: LegalizeVectorTypes.cpp //===--------------------------------------------------------------------===// - /// Helper GenWidenVectorLoads - Helper function to generate a set of - /// loads to load a vector with a resulting wider type. It takes + /// Helper function to generate a set of loads to load a vector with a + /// resulting wider type. It takes: /// LdChain: list of chains for the load to be generated. /// Ld: load to widen SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, LoadSDNode *LD); - /// GenWidenVectorExtLoads - Helper function to generate a set of extension - /// loads to load a ector with a resulting wider type. It takes + /// Helper function to generate a set of extension loads to load a vector with + /// a resulting wider type. It takes: /// LdChain: list of chains for the load to be generated. /// Ld: load to widen /// ExtType: extension element type SDValue GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, LoadSDNode *LD, ISD::LoadExtType ExtType); - /// Helper genWidenVectorStores - Helper function to generate a set of - /// stores to store a widen vector into non-widen memory + /// Helper function to generate a set of stores to store a widen vector into + /// non-widen memory. /// StChain: list of chains for the stores we have generated /// ST: store of a widen value void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST); - /// Helper genWidenVectorTruncStores - Helper function to generate a set of - /// stores to store a truncate widen vector into non-widen memory + /// Helper function to generate a set of stores to store a truncate widen + /// vector into non-widen memory. /// StChain: list of chains for the stores we have generated /// ST: store of a widen value void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, @@ -785,8 +781,7 @@ private: /// Modifies a vector input (widen or narrows) to a vector of NVT. The /// input vector must have the same element type as NVT. - /// When FillWithZeroes is "on" the vector will be widened with - /// zeroes. + /// When FillWithZeroes is "on" the vector will be widened with zeroes. /// By default, the vector will be widened with undefined values. SDValue ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes = false); @@ -807,8 +802,8 @@ private: GetExpandedFloat(Op, Lo, Hi); } - /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and - /// high parts of the given value. + /// Use ISD::EXTRACT_ELEMENT nodes to extract the low and high parts of the + /// given value. void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi); // Generic Result Splitting. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 593c346..665180e 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -170,12 +170,10 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); // Emit a store to the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo, - false, false, 0); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo); // Load the first half from the stack slot. - Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, - false, false, false, 0); + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo); // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; @@ -185,8 +183,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, - PtrInfo.getWithOffset(IncrementSize), false, - false, false, MinAlign(Alignment, IncrementSize)); + PtrInfo.getWithOffset(IncrementSize), + MinAlign(Alignment, IncrementSize)); // Handle endianness of the load. if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout())) @@ -263,16 +261,12 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); unsigned Alignment = LD->getAlignment(); - bool isVolatile = LD->isVolatile(); - bool isNonTemporal = LD->isNonTemporal(); - bool isInvariant = LD->isInvariant(); AAMDNodes AAInfo = LD->getAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); - Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Alignment, - AAInfo); + Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), Alignment, + LD->getMemOperand()->getFlags(), AAInfo); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; @@ -280,8 +274,8 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - isVolatile, isNonTemporal, isInvariant, - MinAlign(Alignment, IncrementSize), AAInfo); + MinAlign(Alignment, IncrementSize), + LD->getMemOperand()->getFlags(), AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -478,8 +472,6 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { SDValue Chain = St->getChain(); SDValue Ptr = St->getBasePtr(); unsigned Alignment = St->getAlignment(); - bool isVolatile = St->isVolatile(); - bool isNonTemporal = St->isNonTemporal(); AAMDNodes AAInfo = St->getAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -491,15 +483,15 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout())) std::swap(Lo, Hi); - Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, AAInfo); + Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), Alignment, + St->getMemOperand()->getFlags(), AAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize), - isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), AAInfo); + MinAlign(Alignment, IncrementSize), + St->getMemOperand()->getFlags(), AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index f61f631..3c9cb17 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -358,8 +358,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case TargetLowering::Legal: break; case TargetLowering::Custom: { - SDValue Tmp1 = TLI.LowerOperation(Op, DAG); - if (Tmp1.getNode()) { + if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) { Result = Tmp1; break; } @@ -493,21 +492,26 @@ SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) { SDValue VectorLegalizer::ExpandLoad(SDValue Op) { - SDLoc dl(Op); LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); - SDValue Chain = LD->getChain(); - SDValue BasePTR = LD->getBasePtr(); - EVT SrcVT = LD->getMemoryVT(); - ISD::LoadExtType ExtType = LD->getExtensionType(); - SmallVector<SDValue, 8> Vals; - SmallVector<SDValue, 8> LoadChains; + EVT SrcVT = LD->getMemoryVT(); + EVT SrcEltVT = SrcVT.getScalarType(); unsigned NumElem = SrcVT.getVectorNumElements(); - EVT SrcEltVT = SrcVT.getScalarType(); - EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType(); + SDValue NewChain; + SDValue Value; if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { + SDLoc dl(Op); + + SmallVector<SDValue, 8> Vals; + SmallVector<SDValue, 8> LoadChains; + + EVT DstEltVT = LD->getValueType(0).getScalarType(); + SDValue Chain = LD->getChain(); + SDValue BasePTR = LD->getBasePtr(); + ISD::LoadExtType ExtType = LD->getExtensionType(); + // When elements in a vector is not byte-addressable, we cannot directly // load each element by advancing pointer, which could only address bytes. // Instead, we load all significant words, mask bits off, and concatenate @@ -531,24 +535,22 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { unsigned LoadBytes = WideBytes; if (RemainingBytes >= LoadBytes) { - ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR, - LD->getPointerInfo().getWithOffset(Offset), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), - MinAlign(LD->getAlignment(), Offset), - LD->getAAInfo()); + ScalarLoad = + DAG.getLoad(WideVT, dl, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Offset), + MinAlign(LD->getAlignment(), Offset), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); } else { EVT LoadVT = WideVT; while (RemainingBytes < LoadBytes) { LoadBytes >>= 1; // Reduce the load size by half. LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3); } - ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, - LD->getPointerInfo().getWithOffset(Offset), - LoadVT, LD->isVolatile(), - LD->isNonTemporal(), LD->isInvariant(), - MinAlign(LD->getAlignment(), Offset), - LD->getAAInfo()); + ScalarLoad = + DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Offset), LoadVT, + MinAlign(LD->getAlignment(), Offset), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); } RemainingBytes -= LoadBytes; @@ -614,29 +616,17 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { } Vals.push_back(Lo); } - } else { - unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; - for (unsigned Idx=0; Idx<NumElem; Idx++) { - SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl, - Op.getNode()->getValueType(0).getScalarType(), - Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), - SrcVT.getScalarType(), - LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), - MinAlign(LD->getAlignment(), Idx * Stride), LD->getAAInfo()); - - BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getConstant(Stride, dl, BasePTR.getValueType())); + NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); + Value = DAG.getNode(ISD::BUILD_VECTOR, dl, + Op.getNode()->getValueType(0), Vals); + } else { + SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG); - Vals.push_back(ScalarLoad.getValue(0)); - LoadChains.push_back(ScalarLoad.getValue(1)); - } + NewChain = Scalarized.getValue(1); + Value = Scalarized.getValue(0); } - SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); - SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, - Op.getNode()->getValueType(0), Vals); - AddLegalizedOperand(Op.getValue(0), Value); AddLegalizedOperand(Op.getValue(1), NewChain); @@ -644,54 +634,37 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { } SDValue VectorLegalizer::ExpandStore(SDValue Op) { - SDLoc dl(Op); StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); - SDValue Chain = ST->getChain(); - SDValue BasePTR = ST->getBasePtr(); - SDValue Value = ST->getValue(); - EVT StVT = ST->getMemoryVT(); - - unsigned Alignment = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); - AAMDNodes AAInfo = ST->getAAInfo(); - unsigned NumElem = StVT.getVectorNumElements(); - // The type of the data we want to save - EVT RegVT = Value.getValueType(); - EVT RegSclVT = RegVT.getScalarType(); - // The type of data as saved in memory. + EVT StVT = ST->getMemoryVT(); EVT MemSclVT = StVT.getScalarType(); - - // Cast floats into integers unsigned ScalarSize = MemSclVT.getSizeInBits(); // Round odd types to the next pow of two. - if (!isPowerOf2_32(ScalarSize)) - ScalarSize = NextPowerOf2(ScalarSize); - - // Store Stride in bytes - unsigned Stride = ScalarSize/8; - // Extract each of the elements from the original vector - // and save them into memory individually. - SmallVector<SDValue, 8> Stores; - for (unsigned Idx = 0; Idx < NumElem; Idx++) { - SDValue Ex = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, RegSclVT, Value, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - - // This scalar TruncStore may be illegal, but we legalize it later. - SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, - ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, - isVolatile, isNonTemporal, MinAlign(Alignment, Idx*Stride), - AAInfo); - - BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getConstant(Stride, dl, BasePTR.getValueType())); - - Stores.push_back(Store); + if (!isPowerOf2_32(ScalarSize)) { + // FIXME: This is completely broken and inconsistent with ExpandLoad + // handling. + + // For sub-byte element sizes, this ends up with 0 stride between elements, + // so the same element just gets re-written to the same location. There seem + // to be tests explicitly testing for this broken behavior though. tests + // for this broken behavior. + + LLVMContext &Ctx = *DAG.getContext(); + + EVT NewMemVT + = EVT::getVectorVT(Ctx, + MemSclVT.getIntegerVT(Ctx, NextPowerOf2(ScalarSize)), + StVT.getVectorNumElements()); + + SDValue NewVectorStore = DAG.getTruncStore( + ST->getChain(), SDLoc(Op), ST->getValue(), ST->getBasePtr(), + ST->getPointerInfo(), NewMemVT, ST->getAlignment(), + ST->getMemOperand()->getFlags(), ST->getAAInfo()); + ST = cast<StoreSDNode>(NewVectorStore.getNode()); } - SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + + SDValue TF = TLI.scalarizeVectorStore(ST, DAG); AddLegalizedOperand(Op, TF); return TF; } @@ -864,10 +837,7 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) { int NumSrcElements = SrcVT.getVectorNumElements(); // Build up a zero vector to blend into this one. - EVT SrcScalarVT = SrcVT.getScalarType(); - SDValue ScalarZero = DAG.getTargetConstant(0, DL, SrcScalarVT); - SmallVector<SDValue, 4> BuildVectorOperands(NumSrcElements, ScalarZero); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands); + SDValue Zero = DAG.getConstant(0, DL, SrcVT); // Shuffle the incoming lanes into the correct position, and pull all other // lanes from the zero vector. @@ -885,16 +855,19 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) { DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask)); } -SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { - EVT VT = Op.getValueType(); - - // Generate a byte wise shuffle mask for the BSWAP. - SmallVector<int, 16> ShuffleMask; +static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) for (int J = ScalarSizeInBytes - 1; J >= 0; --J) ShuffleMask.push_back((I * ScalarSizeInBytes) + J); +} +SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { + EVT VT = Op.getValueType(); + + // Generate a byte wise shuffle mask for the BSWAP. + SmallVector<int, 16> ShuffleMask; + createBSWAPShuffleMask(VT, ShuffleMask); EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size()); // Only emit a shuffle if the mask is legal. @@ -903,8 +876,7 @@ SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { SDLoc DL(Op); Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); - Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), - ShuffleMask.data()); + Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask); return DAG.getNode(ISD::BITCAST, DL, VT, Op); } @@ -915,12 +887,36 @@ SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) { if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) return DAG.UnrollVectorOp(Op.getNode()); + // If the vector element width is a whole number of bytes, test if its legal + // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte + // vector. This greatly reduces the number of bit shifts necessary. + unsigned ScalarSizeInBits = VT.getScalarSizeInBits(); + if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) { + SmallVector<int, 16> BSWAPMask; + createBSWAPShuffleMask(VT, BSWAPMask); + + EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size()); + if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) && + (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) || + (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) && + TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) { + SDLoc DL(Op); + Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); + Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), + BSWAPMask); + Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); + } + } + // If we have the appropriate vector bit operations, it is better to use them // than unrolling and expanding each component. if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) || !TLI.isOperationLegalOrCustom(ISD::SRL, VT) || - !TLI.isOperationLegalOrCustom(ISD::AND, VT) || - !TLI.isOperationLegalOrCustom(ISD::OR, VT)) + !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) || + !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) return DAG.UnrollVectorOp(Op.getNode()); // Let LegalizeDAG handle this later. @@ -1027,10 +1023,12 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { } SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) { - // If the non-ZERO_UNDEF version is supported we can let LegalizeDAG handle. + // If the non-ZERO_UNDEF version is supported we can use that instead. unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ; - if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType())) - return Op; + if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType())) { + SDLoc DL(Op); + return DAG.getNode(Opc, DL, Op.getValueType(), Op.getOperand(0)); + } // Otherwise go ahead and unroll. return DAG.UnrollVectorOp(Op.getNode()); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index d0187d3..f3adca4 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -223,17 +223,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); - SDValue Result = DAG.getLoad(ISD::UNINDEXED, - N->getExtensionType(), - N->getValueType(0).getVectorElementType(), - SDLoc(N), - N->getChain(), N->getBasePtr(), - DAG.getUNDEF(N->getBasePtr().getValueType()), - N->getPointerInfo(), - N->getMemoryVT().getVectorElementType(), - N->isVolatile(), N->isNonTemporal(), - N->isInvariant(), N->getOriginalAlignment(), - N->getAAInfo()); + SDValue Result = DAG.getLoad( + ISD::UNINDEXED, N->getExtensionType(), + N->getValueType(0).getVectorElementType(), SDLoc(N), N->getChain(), + N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()), + N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), + N->getOriginalAlignment(), N->getMemOperand()->getFlags(), + N->getAAInfo()); // Legalize the chain result - switch anything that used the old chain to // use the new one. @@ -370,7 +366,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) { // Figure out if the scalar is the LHS or RHS and return it. SDValue Arg = N->getOperand(2).getOperand(0); - if (Arg.getOpcode() == ISD::UNDEF) + if (Arg.isUndef()) return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue(); return GetScalarizedVector(N->getOperand(Op)); @@ -476,16 +472,16 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { return false; } -/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs -/// to be scalarized, it must be <1 x ty>. Convert the element instead. +/// If the value to convert is a vector that needs to be scalarized, it must be +/// <1 x ty>. Convert the element instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { SDValue Elt = GetScalarizedVector(N->getOperand(0)); return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Elt); } -/// ScalarizeVecOp_UnaryOp - If the input is a vector that needs to be -/// scalarized, it must be <1 x ty>. Do the operation on the element instead. +/// If the input is a vector that needs to be scalarized, it must be <1 x ty>. +/// Do the operation on the element instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { assert(N->getValueType(0).getVectorNumElements() == 1 && "Unexpected vector type!"); @@ -497,8 +493,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Op); } -/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one - -/// use a BUILD_VECTOR instead. +/// The vectors to concatenate have length one - use a BUILD_VECTOR instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { SmallVector<SDValue, 8> Ops(N->getNumOperands()); for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) @@ -506,9 +501,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Ops); } -/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to -/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the -/// index. +/// If the input is a vector that needs to be scalarized, it must be <1 x ty>, +/// so just return the element, ignoring the index. SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue Res = GetScalarizedVector(N->getOperand(0)); if (Res.getValueType() != N->getValueType(0)) @@ -518,8 +512,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { } -/// ScalarizeVecOp_VSELECT - If the input condition is a vector that needs to be -/// scalarized, it must be <1 x i1>, so just convert to a normal ISD::SELECT +/// If the input condition is a vector that needs to be scalarized, it must be +/// <1 x i1>, so just convert to a normal ISD::SELECT /// (still with vector output type since that was acceptable if we got here). SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) { SDValue ScalarCond = GetScalarizedVector(N->getOperand(0)); @@ -529,29 +523,28 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) { N->getOperand(2)); } -/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be -/// scalarized, it must be <1 x ty>. Just store the element. +/// If the value to store is a vector that needs to be scalarized, it must be +/// <1 x ty>. Just store the element. SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ assert(N->isUnindexed() && "Indexed store of one-element vector?"); assert(OpNo == 1 && "Do not know how to scalarize this operand!"); SDLoc dl(N); if (N->isTruncatingStore()) - return DAG.getTruncStore(N->getChain(), dl, - GetScalarizedVector(N->getOperand(1)), - N->getBasePtr(), N->getPointerInfo(), - N->getMemoryVT().getVectorElementType(), - N->isVolatile(), N->isNonTemporal(), - N->getAlignment(), N->getAAInfo()); + return DAG.getTruncStore( + N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), + N->getBasePtr(), N->getPointerInfo(), + N->getMemoryVT().getVectorElementType(), N->getAlignment(), + N->getMemOperand()->getFlags(), N->getAAInfo()); return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), N->getBasePtr(), N->getPointerInfo(), - N->isVolatile(), N->isNonTemporal(), - N->getOriginalAlignment(), N->getAAInfo()); + N->getOriginalAlignment(), N->getMemOperand()->getFlags(), + N->getAAInfo()); } -/// ScalarizeVecOp_FP_ROUND - If the value to round is a vector that needs -/// to be scalarized, it must be <1 x ty>. Convert the element instead. +/// If the value to round is a vector that needs to be scalarized, it must be +/// <1 x ty>. Convert the element instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) { SDValue Elt = GetScalarizedVector(N->getOperand(0)); SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N), @@ -564,11 +557,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) { // Result Vector Splitting //===----------------------------------------------------------------------===// -/// SplitVectorResult - This method is called when the specified result of the -/// specified node is found to need vector splitting. At this point, the node -/// may also have invalid operands or may have other results that need -/// legalization, we just know that (at least) one result needs vector -/// splitting. +/// This method is called when the specified result of the specified node is +/// found to need vector splitting. At this point, the node may also have +/// invalid operands or may have other results that need legalization, we just +/// know that (at least) one result needs vector splitting. void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { DEBUG(dbgs() << "Split node result: "; N->dump(&DAG); @@ -621,6 +613,12 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); break; + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: + SplitVecRes_ExtVecInRegOp(N, Lo, Hi); + break; + case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CONVERT_RNDSAT: @@ -664,6 +662,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::ADD: case ISD::SUB: case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: case ISD::FADD: case ISD::FSUB: case ISD::FMUL: @@ -845,23 +845,41 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDLoc dl(N); GetSplitVector(Vec, Lo, Hi); - // Spill the vector to the stack. EVT VecVT = Vec.getValueType(); - EVT SubVecVT = VecVT.getVectorElementType(); + EVT VecElemVT = VecVT.getVectorElementType(); + unsigned VecElems = VecVT.getVectorNumElements(); + unsigned SubElems = SubVec.getValueType().getVectorNumElements(); + + // If we know the index is 0, and we know the subvector doesn't cross the + // boundary between the halves, we can avoid spilling the vector, and insert + // into the lower half of the split vector directly. + // TODO: The IdxVal == 0 constraint is artificial, we could do this whenever + // the index is constant and there is no boundary crossing. But those cases + // don't seem to get hit in practice. + if (ConstantSDNode *ConstIdx = dyn_cast<ConstantSDNode>(Idx)) { + unsigned IdxVal = ConstIdx->getZExtValue(); + if ((IdxVal == 0) && (IdxVal + SubElems <= VecElems / 2)) { + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx); + return; + } + } + + // Spill the vector to the stack. SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, - MachinePointerInfo(), false, false, 0); + SDValue Store = + DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); // Store the new subvector into the specified index. - SDValue SubVecPtr = GetVectorElementPointer(StackPtr, SubVecVT, Idx); + SDValue SubVecPtr = GetVectorElementPointer(StackPtr, VecElemVT, Idx); Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); - Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo(), - false, false, 0); + Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo()); // Load the Lo part from the stack slot. - Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - false, false, false, 0); + Lo = + DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo()); // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; @@ -871,7 +889,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - false, false, false, MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize)); } void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, @@ -917,6 +935,39 @@ void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, DAG.getValueType(HiVT)); } +void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + unsigned Opcode = N->getOpcode(); + SDValue N0 = N->getOperand(0); + + SDLoc dl(N); + SDValue InLo, InHi; + GetSplitVector(N0, InLo, InHi); + EVT InLoVT = InLo.getValueType(); + unsigned InNumElements = InLoVT.getVectorNumElements(); + + EVT OutLoVT, OutHiVT; + std::tie(OutLoVT, OutHiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + unsigned OutNumElements = OutLoVT.getVectorNumElements(); + assert((2 * OutNumElements) <= InNumElements && + "Illegal extend vector in reg split"); + + // *_EXTEND_VECTOR_INREG instructions extend the lowest elements of the + // input vector (i.e. we only use InLo): + // OutLo will extend the first OutNumElements from InLo. + // OutHi will extend the next OutNumElements from InLo. + + // Shuffle the elements from InLo for OutHi into the bottom elements to + // create a 'fake' InHi. + SmallVector<int, 8> SplitHi(InNumElements, -1); + for (unsigned i = 0; i != OutNumElements; ++i) + SplitHi[i] = i + OutNumElements; + InHi = DAG.getVectorShuffle(InLoVT, dl, InLo, DAG.getUNDEF(InLoVT), SplitHi); + + Lo = DAG.getNode(Opcode, dl, OutLoVT, InLo); + Hi = DAG.getNode(Opcode, dl, OutHiVT, InHi); +} + void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); @@ -947,20 +998,20 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, EVT VecVT = Vec.getValueType(); EVT EltVT = VecVT.getVectorElementType(); SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, - MachinePointerInfo(), false, false, 0); + SDValue Store = + DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); // Store the new element. This may be larger than the vector element type, // so use a truncating store. SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); - Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT, - false, false, 0); + Store = + DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT); // Load the Lo part from the stack slot. - Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - false, false, false, 0); + Lo = + DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo()); // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; @@ -970,7 +1021,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - false, false, false, MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize)); } void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, @@ -995,25 +1046,21 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT MemoryVT = LD->getMemoryVT(); unsigned Alignment = LD->getOriginalAlignment(); - bool isVolatile = LD->isVolatile(); - bool isNonTemporal = LD->isNonTemporal(); - bool isInvariant = LD->isInvariant(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, - LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, - isInvariant, Alignment, AAInfo); + LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, - LD->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment, - AAInfo); + LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT, + Alignment, MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -1062,7 +1109,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), + getMachineMemOperand(MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); @@ -1074,7 +1121,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), + getMachineMemOperand(MLD->getPointerInfo(), MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); @@ -1131,7 +1178,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MGT->getPointerInfo(), + getMachineMemOperand(MGT->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); @@ -1362,7 +1409,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue Op1 = InputUsed[1] == -1U ? DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]]; // At least one input vector was used. Create a new shuffle vector. - Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]); + Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, Ops); } Ops.clear(); @@ -1374,10 +1421,10 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, // Operand Vector Splitting //===----------------------------------------------------------------------===// -/// SplitVectorOperand - This method is called when the specified operand of the -/// specified node is found to need vector splitting. At this point, all of the -/// result types of the node are known to be legal, but other operands of the -/// node may need legalization as well as the specified one. +/// This method is called when the specified operand of the specified node is +/// found to need vector splitting. At this point, all of the result types of +/// the node are known to be legal, but other operands of the node may need +/// legalization as well as the specified one. bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG); @@ -1600,13 +1647,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Store the vector to the stack. SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, - MachinePointerInfo(), false, false, 0); + SDValue Store = + DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); // Load back the required element. StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, - MachinePointerInfo(), EltVT, false, false, false, 0); + MachinePointerInfo(), EltVT); } SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, @@ -1646,7 +1693,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MGT->getPointerInfo(), + getMachineMemOperand(MGT->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); @@ -1655,7 +1702,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, OpsLo, MMO); MMO = DAG.getMachineFunction(). - getMachineMemOperand(MGT->getPointerInfo(), + getMachineMemOperand(MGT->getPointerInfo(), MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); @@ -1688,7 +1735,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, EVT MemoryVT = N->getMemoryVT(); unsigned Alignment = N->getOriginalAlignment(); SDLoc DL(N); - + EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); @@ -1717,7 +1764,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, SDValue Lo, Hi; MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(N->getPointerInfo(), + getMachineMemOperand(N->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); @@ -1729,7 +1776,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); MMO = DAG.getMachineFunction(). - getMachineMemOperand(N->getPointerInfo(), + getMachineMemOperand(N->getPointerInfo(), MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment, N->getAAInfo(), N->getRanges()); @@ -1778,7 +1825,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, SDValue Lo, Hi; MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(N->getPointerInfo(), + getMachineMemOperand(N->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); @@ -1787,7 +1834,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, DL, OpsLo, MMO); MMO = DAG.getMachineFunction(). - getMachineMemOperand(N->getPointerInfo(), + getMachineMemOperand(N->getPointerInfo(), MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); @@ -1810,8 +1857,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { SDValue Ptr = N->getBasePtr(); EVT MemoryVT = N->getMemoryVT(); unsigned Alignment = N->getOriginalAlignment(); - bool isVol = N->isVolatile(); - bool isNT = N->isNonTemporal(); + MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); AAMDNodes AAInfo = N->getAAInfo(); SDValue Lo, Hi; GetSplitVector(N->getOperand(1), Lo, Hi); @@ -1822,11 +1868,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned IncrementSize = LoMemVT.getSizeInBits()/8; if (isTruncating) - Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - LoMemVT, isVol, isNT, Alignment, AAInfo); + Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), LoMemVT, + Alignment, MMOFlags, AAInfo); else - Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - isVol, isNT, Alignment, AAInfo); + Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags, + AAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, @@ -1835,11 +1881,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { if (isTruncating) Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVol, isNT, Alignment, AAInfo); + HiMemVT, Alignment, MMOFlags, AAInfo); else Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), - isVol, isNT, Alignment, AAInfo); + Alignment, MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); } @@ -1889,7 +1935,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { EVT OutVT = N->getValueType(0); unsigned NumElements = OutVT.getVectorNumElements(); bool IsFloat = OutVT.isFloatingPoint(); - + // Widening should have already made sure this is a power-two vector // if we're trying to split it at all. assert() that's true, just in case. assert(!(NumElements & 1) && "Splitting vector, but not in half!"); @@ -2069,6 +2115,12 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Shift(N); break; + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: + Res = WidenVecRes_EXTEND_VECTOR_INREG(N); + break; + case ISD::ANY_EXTEND: case ISD::FP_EXTEND: case ISD::FP_ROUND: @@ -2355,6 +2407,61 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops); } +SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) { + unsigned Opcode = N->getOpcode(); + SDValue InOp = N->getOperand(0); + SDLoc DL(N); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT WidenSVT = WidenVT.getVectorElementType(); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + EVT InVT = InOp.getValueType(); + EVT InSVT = InVT.getVectorElementType(); + unsigned InVTNumElts = InVT.getVectorNumElements(); + + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { + InOp = GetWidenedVector(InOp); + InVT = InOp.getValueType(); + if (InVT.getSizeInBits() == WidenVT.getSizeInBits()) { + switch (Opcode) { + case ISD::ANY_EXTEND_VECTOR_INREG: + return DAG.getAnyExtendVectorInReg(InOp, DL, WidenVT); + case ISD::SIGN_EXTEND_VECTOR_INREG: + return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT); + case ISD::ZERO_EXTEND_VECTOR_INREG: + return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT); + } + } + } + + // Unroll, extend the scalars and rebuild the vector. + SmallVector<SDValue, 16> Ops; + for (unsigned i = 0, e = std::min(InVTNumElts, WidenNumElts); i != e; ++i) { + SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InSVT, InOp, + DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + switch (Opcode) { + case ISD::ANY_EXTEND_VECTOR_INREG: + Val = DAG.getNode(ISD::ANY_EXTEND, DL, WidenSVT, Val); + break; + case ISD::SIGN_EXTEND_VECTOR_INREG: + Val = DAG.getNode(ISD::SIGN_EXTEND, DL, WidenSVT, Val); + break; + case ISD::ZERO_EXTEND_VECTOR_INREG: + Val = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenSVT, Val); + break; + default: + llvm_unreachable("A *_EXTEND_VECTOR_INREG node was expected"); + } + Ops.push_back(Val); + } + + while (Ops.size() != WidenNumElts) + Ops.push_back(DAG.getUNDEF(WidenSVT)); + + return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops); +} + SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) { // If this is an FCOPYSIGN with same input types, we can treat it as a // normal (can trap) binary op. @@ -2546,7 +2653,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { // The inputs and the result are widen to the same value. unsigned i; for (i=1; i < NumOperands; ++i) - if (N->getOperand(i).getOpcode() != ISD::UNDEF) + if (!N->getOperand(i).isUndef()) break; if (i == NumOperands) @@ -2564,7 +2671,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { return DAG.getVectorShuffle(WidenVT, dl, GetWidenedVector(N->getOperand(0)), GetWidenedVector(N->getOperand(1)), - &MaskOps[0]); + MaskOps); } } } @@ -2744,7 +2851,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { - + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0)); SDValue Mask = N->getMask(); EVT MaskVT = Mask.getValueType(); @@ -2898,7 +3005,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { } for (unsigned i = NumElts; i != WidenNumElts; ++i) NewMask.push_back(-1); - return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]); + return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask); } SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { @@ -3072,9 +3179,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { - // Since the result is legal and the input is illegal, it is unlikely - // that we can fix the input to a legal type so unroll the convert - // into some scalar code and create a nasty build vector. + // Since the result is legal and the input is illegal, it is unlikely that we + // can fix the input to a legal type so unroll the convert into some scalar + // code and create a nasty build vector. EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); SDLoc dl(N); @@ -3161,7 +3268,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { - // We have to widen the value but we want only to store the original + // We have to widen the value, but we want only to store the original // vector type. StoreSDNode *ST = cast<StoreSDNode>(N); @@ -3189,10 +3296,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) Mask = GetWidenedVector(Mask); else { - // The mask should be widened as well + // The mask should be widened as well. EVT BoolVT = getSetCCResultType(WideVal.getValueType()); // We can't use ModifyToType() because we should fill the mask with - // zeroes + // zeroes. unsigned WidenNumElts = BoolVT.getVectorNumElements(); unsigned MaskNumElts = MaskVT.getVectorNumElements(); @@ -3219,16 +3326,16 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { SDValue DataOp = MSC->getValue(); SDValue Mask = MSC->getMask(); - // Widen the value + // Widen the value. SDValue WideVal = GetWidenedVector(DataOp); EVT WideVT = WideVal.getValueType(); unsigned NumElts = WideVal.getValueType().getVectorNumElements(); SDLoc dl(N); - // The mask should be widened as well + // The mask should be widened as well. Mask = WidenTargetBoolean(Mask, WideVT, true); - // Widen index + // Widen index. SDValue Index = MSC->getIndex(); EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(), Index.getValueType().getScalarType(), @@ -3293,7 +3400,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, if (Width == WidenEltWidth) return RetVT; - // See if there is larger legal integer than the element type to load/store + // See if there is larger legal integer than the element type to load/store. unsigned VT; for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE; VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) { @@ -3355,7 +3462,7 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, NumElts = Width / NewLdTy.getSizeInBits(); NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts); VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp); - // Readjust position and vector position based on new load type + // Readjust position and vector position based on new load type. Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits(); LdTy = NewLdTy; } @@ -3368,8 +3475,8 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, LoadSDNode *LD) { - // The strategy assumes that we can efficiently load powers of two widths. - // The routines chops the vector into the largest vector loads with the same + // The strategy assumes that we can efficiently load power-of-two widths. + // The routine chops the vector into the largest vector loads with the same // element type or scalar loads and then recombines it to the widen vector // type. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); @@ -3380,27 +3487,24 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); // Load information - SDValue Chain = LD->getChain(); - SDValue BasePtr = LD->getBasePtr(); - unsigned Align = LD->getAlignment(); - bool isVolatile = LD->isVolatile(); - bool isNonTemporal = LD->isNonTemporal(); - bool isInvariant = LD->isInvariant(); + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + unsigned Align = LD->getAlignment(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); int LdWidth = LdVT.getSizeInBits(); - int WidthDiff = WidenWidth - LdWidth; // Difference - unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads + int WidthDiff = WidenWidth - LdWidth; + unsigned LdAlign = LD->isVolatile() ? 0 : Align; // Allow wider loads. // Find the vector type that can load from. EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); int NewVTWidth = NewVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Align, - AAInfo); + Align, MMOFlags, AAInfo); LdChain.push_back(LdOp.getValue(1)); - // Check if we can load the element with one instruction + // Check if we can load the element with one instruction. if (LdWidth <= NewVTWidth) { if (!NewVT.isVector()) { unsigned NumElts = WidenWidth / NewVTWidth; @@ -3421,7 +3525,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps); } - // Load vector by using multiple loads from largest vector to scalar + // Load vector by using multiple loads from largest vector to scalar. SmallVector<SDValue, 16> LdOps; LdOps.push_back(LdOp); @@ -3436,13 +3540,12 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, SDValue L; if (LdWidth < NewVTWidth) { - // Our current type we are using is too large, find a better size + // The current type we are using is too large. Find a better size. NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); NewVTWidth = NewVT.getSizeInBits(); L = DAG.getLoad(NewVT, dl, Chain, BasePtr, - LD->getPointerInfo().getWithOffset(Offset), isVolatile, - isNonTemporal, isInvariant, MinAlign(Align, Increment), - AAInfo); + LD->getPointerInfo().getWithOffset(Offset), + MinAlign(Align, Increment), MMOFlags, AAInfo); LdChain.push_back(L.getValue(1)); if (L->getValueType(0).isVector()) { SmallVector<SDValue, 16> Loads; @@ -3456,9 +3559,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, } } else { L = DAG.getLoad(NewVT, dl, Chain, BasePtr, - LD->getPointerInfo().getWithOffset(Offset), isVolatile, - isNonTemporal, isInvariant, MinAlign(Align, Increment), - AAInfo); + LD->getPointerInfo().getWithOffset(Offset), + MinAlign(Align, Increment), MMOFlags, AAInfo); LdChain.push_back(L.getValue(1)); } @@ -3468,33 +3570,33 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, LdWidth -= NewVTWidth; } - // Build the vector from the loads operations + // Build the vector from the load operations. unsigned End = LdOps.size(); if (!LdOps[0].getValueType().isVector()) // All the loads are scalar loads. return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End); // If the load contains vectors, build the vector using concat vector. - // All of the vectors used to loads are power of 2 and the scalars load - // can be combined to make a power of 2 vector. + // All of the vectors used to load are power-of-2, and the scalar loads can be + // combined to make a power-of-2 vector. SmallVector<SDValue, 16> ConcatOps(End); int i = End - 1; int Idx = End; EVT LdTy = LdOps[i].getValueType(); - // First combine the scalar loads to a vector + // First, combine the scalar loads to a vector. if (!LdTy.isVector()) { for (--i; i >= 0; --i) { LdTy = LdOps[i].getValueType(); if (LdTy.isVector()) break; } - ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End); + ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i + 1, End); } ConcatOps[--Idx] = LdOps[i]; for (--i; i >= 0; --i) { EVT NewLdTy = LdOps[i].getValueType(); if (NewLdTy != LdTy) { - // Create a larger vector + // Create a larger vector. ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy, makeArrayRef(&ConcatOps[Idx], End - Idx)); Idx = End - 1; @@ -3503,11 +3605,11 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, ConcatOps[--Idx] = LdOps[i]; } - if (WidenWidth == LdTy.getSizeInBits()*(End - Idx)) + if (WidenWidth == LdTy.getSizeInBits() * (End - Idx)) return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, makeArrayRef(&ConcatOps[Idx], End - Idx)); - // We need to fill the rest with undefs to build the vector + // We need to fill the rest with undefs to build the vector. unsigned NumOps = WidenWidth / LdTy.getSizeInBits(); SmallVector<SDValue, 16> WidenOps(NumOps); SDValue UndefVal = DAG.getUNDEF(LdTy); @@ -3526,33 +3628,30 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, LoadSDNode *LD, ISD::LoadExtType ExtType) { // For extension loads, it may not be more efficient to chop up the vector - // and then extended it. Instead, we unroll the load and build a new vector. + // and then extend it. Instead, we unroll the load and build a new vector. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); EVT LdVT = LD->getMemoryVT(); SDLoc dl(LD); assert(LdVT.isVector() && WidenVT.isVector()); // Load information - SDValue Chain = LD->getChain(); - SDValue BasePtr = LD->getBasePtr(); - unsigned Align = LD->getAlignment(); - bool isVolatile = LD->isVolatile(); - bool isNonTemporal = LD->isNonTemporal(); - bool isInvariant = LD->isInvariant(); + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + unsigned Align = LD->getAlignment(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); EVT EltVT = WidenVT.getVectorElementType(); EVT LdEltVT = LdVT.getVectorElementType(); unsigned NumElts = LdVT.getVectorNumElements(); - // Load each element and widen + // Load each element and widen. unsigned WidenNumElts = WidenVT.getVectorNumElements(); SmallVector<SDValue, 16> Ops(WidenNumElts); unsigned Increment = LdEltVT.getSizeInBits() / 8; - Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, - LD->getPointerInfo(), - LdEltVT, isVolatile, isNonTemporal, isInvariant, - Align, AAInfo); + Ops[0] = + DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(), + LdEltVT, Align, MMOFlags, AAInfo); LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { @@ -3562,12 +3661,11 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, BasePtr.getValueType())); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, LD->getPointerInfo().getWithOffset(Offset), LdEltVT, - isVolatile, isNonTemporal, isInvariant, Align, - AAInfo); + Align, MMOFlags, AAInfo); LdChain.push_back(Ops[i].getValue(1)); } - // Fill the rest with undefs + // Fill the rest with undefs. SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i != WidenNumElts; ++i) Ops[i] = UndefVal; @@ -3578,14 +3676,13 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST) { - // The strategy assumes that we can efficiently store powers of two widths. - // The routines chops the vector into the largest vector stores with the same + // The strategy assumes that we can efficiently store power-of-two widths. + // The routine chops the vector into the largest vector stores with the same // element type or scalar stores. SDValue Chain = ST->getChain(); SDValue BasePtr = ST->getBasePtr(); unsigned Align = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); SDLoc dl(ST); @@ -3601,7 +3698,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, int Idx = 0; // current index to store unsigned Offset = 0; // offset from base to store while (StWidth != 0) { - // Find the largest vector type we can store with + // Find the largest vector type we can store with. EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT); unsigned NewVTWidth = NewVT.getSizeInBits(); unsigned Increment = NewVTWidth / 8; @@ -3611,10 +3708,9 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, SDValue EOp = DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, - ST->getPointerInfo().getWithOffset(Offset), - isVolatile, isNonTemporal, - MinAlign(Align, Offset), AAInfo)); + StChain.push_back(DAG.getStore( + Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), + MinAlign(Align, Offset), MMOFlags, AAInfo)); StWidth -= NewVTWidth; Offset += Increment; Idx += NumVTElts; @@ -3623,28 +3719,27 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, BasePtr.getValueType())); } while (StWidth != 0 && StWidth >= NewVTWidth); } else { - // Cast the vector to the scalar type we can store + // Cast the vector to the scalar type we can store. unsigned NumElts = ValWidth / NewVTWidth; EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp); - // Readjust index position based on new vector type + // Readjust index position based on new vector type. Idx = Idx * ValEltWidth / NewVTWidth; do { SDValue EOp = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, - ST->getPointerInfo().getWithOffset(Offset), - isVolatile, isNonTemporal, - MinAlign(Align, Offset), AAInfo)); + StChain.push_back(DAG.getStore( + Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), + MinAlign(Align, Offset), MMOFlags, AAInfo)); StWidth -= NewVTWidth; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getConstant(Increment, dl, BasePtr.getValueType())); } while (StWidth != 0 && StWidth >= NewVTWidth); - // Restore index back to be relative to the original widen element type + // Restore index back to be relative to the original widen element type. Idx = Idx * NewVTWidth / ValEltWidth; } } @@ -3654,27 +3749,25 @@ void DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST) { // For extension loads, it may not be more efficient to truncate the vector - // and then store it. Instead, we extract each element and then store it. - SDValue Chain = ST->getChain(); - SDValue BasePtr = ST->getBasePtr(); + // and then store it. Instead, we extract each element and then store it. + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); unsigned Align = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); - SDValue ValOp = GetWidenedVector(ST->getValue()); + SDValue ValOp = GetWidenedVector(ST->getValue()); SDLoc dl(ST); EVT StVT = ST->getMemoryVT(); EVT ValVT = ValOp.getValueType(); - // It must be true that we the widen vector type is bigger than where - // we need to store. + // It must be true that the wide vector type is bigger than where we need to + // store. assert(StVT.isVector() && ValOp.getValueType().isVector()); assert(StVT.bitsLT(ValOp.getValueType())); - // For truncating stores, we can not play the tricks of chopping legal - // vector types and bit cast it to the right type. Instead, we unroll - // the store. + // For truncating stores, we can not play the tricks of chopping legal vector + // types and bitcast it to the right type. Instead, we unroll the store. EVT StEltVT = StVT.getVectorElementType(); EVT ValEltVT = ValVT.getVectorElementType(); unsigned Increment = ValEltVT.getSizeInBits() / 8; @@ -3683,9 +3776,8 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, - ST->getPointerInfo(), StEltVT, - isVolatile, isNonTemporal, Align, - AAInfo)); + ST->getPointerInfo(), StEltVT, Align, + MMOFlags, AAInfo)); unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), @@ -3695,10 +3787,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, SDValue EOp = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, - ST->getPointerInfo().getWithOffset(Offset), - StEltVT, isVolatile, isNonTemporal, - MinAlign(Align, Offset), AAInfo)); + StChain.push_back(DAG.getTruncStore( + Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset), + StEltVT, MinAlign(Align, Offset), MMOFlags, AAInfo)); } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 622e06f..1e5c4a7 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -37,7 +37,7 @@ static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable use of DFA during scheduling")); -static cl::opt<signed> RegPressureThreshold( +static cl::opt<int> RegPressureThreshold( "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5), cl::desc("Track reg pressure and switch priority to in-depth")); @@ -323,8 +323,8 @@ void ResourcePriorityQueue::reserveResources(SUnit *SU) { } } -signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { - signed RegBalance = 0; +int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { + int RegBalance = 0; if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) return RegBalance; @@ -357,8 +357,8 @@ signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { /// The RawPressure flag makes this function to ignore /// existing reg file sizes, and report raw def/use /// balance. -signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) { - signed RegBalance = 0; +int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) { + int RegBalance = 0; if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) return RegBalance; @@ -398,9 +398,9 @@ static const unsigned FactorOne = 2; /// Returns single number reflecting benefit of scheduling SU /// in the current cycle. -signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { +int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { // Initial trivial priority. - signed ResCount = 1; + int ResCount = 1; // Do not waste time on a node that is already scheduled. if (SU->isScheduled) @@ -601,7 +601,7 @@ SUnit *ResourcePriorityQueue::pop() { std::vector<SUnit *>::iterator Best = Queue.begin(); if (!DisableDFASched) { - signed BestCost = SUSchedulingCost(*Best); + int BestCost = SUSchedulingCost(*Best); for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) { diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index c27f8de..237d541 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -14,9 +14,9 @@ #ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H #define LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H -#include "llvm/ADT/SmallVector.h" #include "llvm/IR/DebugLoc.h" #include "llvm/Support/DataTypes.h" +#include <utility> namespace llvm { @@ -56,7 +56,8 @@ public: // Constructor for non-constants. SDDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool indir, uint64_t off, DebugLoc dl, unsigned O) - : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(indir) { + : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O), + IsIndirect(indir) { kind = SDNODE; u.s.Node = N; u.s.ResNo = R; @@ -65,7 +66,8 @@ public: // Constructor for constants. SDDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t off, DebugLoc dl, unsigned O) - : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(false) { + : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O), + IsIndirect(false) { kind = CONST; u.Const = C; } @@ -73,7 +75,8 @@ public: // Constructor for frame indices. SDDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) - : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(false) { + : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O), + IsIndirect(false) { kind = FRAMEIX; u.FrameIx = FI; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 91024e6..802c459 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -336,8 +336,8 @@ void ScheduleDAGRRList::Schedule() { // Build the scheduling graph. BuildSchedGraph(nullptr); - DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this)); + DEBUG(for (SUnit &SU : SUnits) + SU.dumpAll(this)); Topo.InitDAGTopologicalSorting(); AvailableQueue->initNodes(SUnits); @@ -1027,43 +1027,37 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { SmallVector<SDep, 4> LoadPreds; SmallVector<SDep, 4> NodePreds; SmallVector<SDep, 4> NodeSuccs; - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) - ChainPreds.push_back(*I); - else if (isOperandOf(I->getSUnit(), LoadNode)) - LoadPreds.push_back(*I); + for (SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) + ChainPreds.push_back(Pred); + else if (isOperandOf(Pred.getSUnit(), LoadNode)) + LoadPreds.push_back(Pred); else - NodePreds.push_back(*I); + NodePreds.push_back(Pred); } - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isCtrl()) - ChainSuccs.push_back(*I); + for (SDep &Succ : SU->Succs) { + if (Succ.isCtrl()) + ChainSuccs.push_back(Succ); else - NodeSuccs.push_back(*I); + NodeSuccs.push_back(Succ); } // Now assign edges to the newly-created nodes. - for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { - const SDep &Pred = ChainPreds[i]; + for (const SDep &Pred : ChainPreds) { RemovePred(SU, Pred); if (isNewLoad) AddPred(LoadSU, Pred); } - for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { - const SDep &Pred = LoadPreds[i]; + for (const SDep &Pred : LoadPreds) { RemovePred(SU, Pred); if (isNewLoad) AddPred(LoadSU, Pred); } - for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { - const SDep &Pred = NodePreds[i]; + for (const SDep &Pred : NodePreds) { RemovePred(SU, Pred); AddPred(NewSU, Pred); } - for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { - SDep D = NodeSuccs[i]; + for (SDep D : NodeSuccs) { SUnit *SuccDep = D.getSUnit(); D.setSUnit(SU); RemovePred(SuccDep, D); @@ -1074,8 +1068,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) --NewSU->NumRegDefsLeft; } - for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { - SDep D = ChainSuccs[i]; + for (SDep D : ChainSuccs) { SUnit *SuccDep = D.getSUnit(); D.setSUnit(SU); RemovePred(SuccDep, D); @@ -1108,29 +1101,27 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { NewSU = CreateClone(SU); // New SUnit has the exact same predecessors. - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) - if (!I->isArtificial()) - AddPred(NewSU, *I); + for (SDep &Pred : SU->Preds) + if (!Pred.isArtificial()) + AddPred(NewSU, Pred); // Only copy scheduled successors. Cut them from old node's successor // list and move them over. SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isArtificial()) + for (SDep &Succ : SU->Succs) { + if (Succ.isArtificial()) continue; - SUnit *SuccSU = I->getSUnit(); + SUnit *SuccSU = Succ.getSUnit(); if (SuccSU->isScheduled) { - SDep D = *I; + SDep D = Succ; D.setSUnit(NewSU); AddPred(SuccSU, D); D.setSUnit(SU); DelDeps.push_back(std::make_pair(SuccSU, D)); } } - for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) - RemovePred(DelDeps[i].first, DelDeps[i].second); + for (auto &DelDep : DelDeps) + RemovePred(DelDep.first, DelDep.second); AvailableQueue->updateNode(SU); AvailableQueue->addNode(NewSU); @@ -1156,16 +1147,15 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, // Only copy scheduled successors. Cut them from old node's successor // list and move them over. SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isArtificial()) + for (SDep &Succ : SU->Succs) { + if (Succ.isArtificial()) continue; - SUnit *SuccSU = I->getSUnit(); + SUnit *SuccSU = Succ.getSUnit(); if (SuccSU->isScheduled) { - SDep D = *I; + SDep D = Succ; D.setSUnit(CopyToSU); AddPred(SuccSU, D); - DelDeps.push_back(std::make_pair(SuccSU, *I)); + DelDeps.push_back(std::make_pair(SuccSU, Succ)); } else { // Avoid scheduling the def-side copy before other successors. Otherwise @@ -1174,8 +1164,8 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial)); } } - for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) - RemovePred(DelDeps[i].first, DelDeps[i].second); + for (auto &DelDep : DelDeps) + RemovePred(DelDep.first, DelDep.second); SDep FromDep(SU, SDep::Data, Reg); FromDep.setLatency(SU->Latency); @@ -1400,16 +1390,14 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // All candidates are delayed due to live physical reg dependencies. // Try backtracking, code duplication, or inserting cross class copies // to resolve it. - for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { - SUnit *TrySU = Interferences[i]; + for (SUnit *TrySU : Interferences) { SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; // Try unscheduling up to the point where it's safe to schedule // this node. SUnit *BtSU = nullptr; unsigned LiveCycle = UINT_MAX; - for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { - unsigned Reg = LRegs[j]; + for (unsigned Reg : LRegs) { if (LiveRegGens[Reg]->getHeight() < LiveCycle) { BtSU = LiveRegGens[Reg]; LiveCycle = BtSU->getHeight(); @@ -1854,10 +1842,9 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { return SethiUllmanNumber; unsigned Extra = 0; - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) continue; // ignore chain preds - SUnit *PredSU = I->getSUnit(); + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; // ignore chain preds + SUnit *PredSU = Pred.getSUnit(); unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); if (PredSethiUllman > SethiUllmanNumber) { SethiUllmanNumber = PredSethiUllman; @@ -1879,8 +1866,8 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { void RegReductionPQBase::CalculateSethiUllmanNumbers() { SethiUllmanNumbers.assign(SUnits->size(), 0); - for (unsigned i = 0, e = SUnits->size(); i != e; ++i) - CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); + for (const SUnit &SU : *SUnits) + CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers); } void RegReductionPQBase::addNode(const SUnit *SU) { @@ -1956,11 +1943,10 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { if (!TLI) return false; - for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; - SUnit *PredSU = I->getSUnit(); + SUnit *PredSU = Pred.getSUnit(); // NumRegDefsLeft is zero when enough uses of this node have been scheduled // to cover the number of registers defined (they are all live). if (PredSU->NumRegDefsLeft == 0) { @@ -2006,11 +1992,10 @@ bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { LiveUses = 0; int PDiff = 0; - for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; - SUnit *PredSU = I->getSUnit(); + SUnit *PredSU = Pred.getSUnit(); // NumRegDefsLeft is zero when enough uses of this node have been scheduled // to cover the number of registers defined (they are all live). if (PredSU->NumRegDefsLeft == 0) { @@ -2050,11 +2035,10 @@ void RegReductionPQBase::scheduledNode(SUnit *SU) { if (!SU->getNode()) return; - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; - SUnit *PredSU = I->getSUnit(); + SUnit *PredSU = Pred.getSUnit(); // NumRegDefsLeft is zero when enough uses of this node have been scheduled // to cover the number of registers defined (they are all live). if (PredSU->NumRegDefsLeft == 0) { @@ -2132,11 +2116,10 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) { return; } - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; - SUnit *PredSU = I->getSUnit(); + SUnit *PredSU = Pred.getSUnit(); // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only // counts data deps. if (PredSU->NumSuccsLeft != PredSU->Succs.size()) @@ -2201,15 +2184,14 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) { /// closest to the current cycle. static unsigned closestSucc(const SUnit *SU) { unsigned MaxHeight = 0; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isCtrl()) continue; // ignore chain succs - unsigned Height = I->getSUnit()->getHeight(); + for (const SDep &Succ : SU->Succs) { + if (Succ.isCtrl()) continue; // ignore chain succs + unsigned Height = Succ.getSUnit()->getHeight(); // If there are bunch of CopyToRegs stacked up, they should be considered // to be at the same position. - if (I->getSUnit()->getNode() && - I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) - Height = closestSucc(I->getSUnit())+1; + if (Succ.getSUnit()->getNode() && + Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) + Height = closestSucc(Succ.getSUnit())+1; if (Height > MaxHeight) MaxHeight = Height; } @@ -2220,9 +2202,8 @@ static unsigned closestSucc(const SUnit *SU) { /// for scratch registers, i.e. number of data dependencies. static unsigned calcMaxScratches(const SUnit *SU) { unsigned Scratches = 0; - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) continue; // ignore chain preds + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; // ignore chain preds Scratches++; } return Scratches; @@ -2232,10 +2213,9 @@ static unsigned calcMaxScratches(const SUnit *SU) { /// CopyFromReg from a virtual register. static bool hasOnlyLiveInOpers(const SUnit *SU) { bool RetVal = false; - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) continue; - const SUnit *PredSU = I->getSUnit(); + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; + const SUnit *PredSU = Pred.getSUnit(); if (PredSU->getNode() && PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { unsigned Reg = @@ -2255,10 +2235,9 @@ static bool hasOnlyLiveInOpers(const SUnit *SU) { /// it has no other use. It should be scheduled closer to the terminator. static bool hasOnlyLiveOutUses(const SUnit *SU) { bool RetVal = false; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isCtrl()) continue; - const SUnit *SuccSU = I->getSUnit(); + for (const SDep &Succ : SU->Succs) { + if (Succ.isCtrl()) continue; + const SUnit *SuccSU = Succ.getSUnit(); if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { unsigned Reg = cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); @@ -2293,10 +2272,9 @@ static void initVRegCycle(SUnit *SU) { SU->isVRegCycle = true; - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) continue; - I->getSUnit()->isVRegCycle = true; + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; + Pred.getSUnit()->isVRegCycle = true; } } @@ -2306,14 +2284,13 @@ static void resetVRegCycle(SUnit *SU) { if (!SU->isVRegCycle) return; - for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) continue; // ignore chain preds - SUnit *PredSU = I->getSUnit(); + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; // ignore chain preds + SUnit *PredSU = Pred.getSUnit(); if (PredSU->isVRegCycle) { assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && "VRegCycle def must be CopyFromReg"); - I->getSUnit()->isVRegCycle = 0; + Pred.getSUnit()->isVRegCycle = false; } } } @@ -2325,11 +2302,10 @@ static bool hasVRegCycleUse(const SUnit *SU) { if (SU->isVRegCycle) return false; - for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) continue; // ignore chain preds - if (I->getSUnit()->isVRegCycle && - I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { + for (const SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; // ignore chain preds + if (Pred.getSUnit()->isVRegCycle && + Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); return true; } @@ -2684,11 +2660,9 @@ void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { CalculateSethiUllmanNumbers(); // For single block loops, mark nodes that look like canonical IV increments. - if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) { - for (unsigned i = 0, e = sunits.size(); i != e; ++i) { - initVRegCycle(&sunits[i]); - } - } + if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) + for (SUnit &SU : sunits) + initVRegCycle(&SU); } //===----------------------------------------------------------------------===// @@ -2726,16 +2700,15 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, if(!ImpDefs && !RegMask) return false; - for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end(); - SI != SE; ++SI) { - SUnit *SuccSU = SI->getSUnit(); - for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(), - PE = SuccSU->Preds.end(); PI != PE; ++PI) { - if (!PI->isAssignedRegDep()) + for (const SDep &Succ : SU->Succs) { + SUnit *SuccSU = Succ.getSUnit(); + for (const SDep &SuccPred : SuccSU->Preds) { + if (!SuccPred.isAssignedRegDep()) continue; - if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) && - scheduleDAG->IsReachable(DepSU, PI->getSUnit())) + if (RegMask && + MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) && + scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) return true; if (ImpDefs) @@ -2743,8 +2716,8 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, // Return true if SU clobbers this physical register use and the // definition of the register reaches from DepSU. IsReachable queries // a topological forward sort of the DAG (following the successors). - if (TRI->regsOverlap(*ImpDef, PI->getReg()) && - scheduleDAG->IsReachable(DepSU, PI->getSUnit())) + if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) && + scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) return true; } } @@ -2823,19 +2796,18 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, /// void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // Visit all the nodes in topological order, working top-down. - for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { - SUnit *SU = &(*SUnits)[i]; + for (SUnit &SU : *SUnits) { // For now, only look at nodes with no data successors, such as stores. // These are especially important, due to the heuristics in // getNodePriority for nodes with no data successors. - if (SU->NumSuccs != 0) + if (SU.NumSuccs != 0) continue; // For now, only look at nodes with exactly one data predecessor. - if (SU->NumPreds != 1) + if (SU.NumPreds != 1) continue; // Avoid prescheduling copies to virtual registers, which don't behave // like other nodes from the perspective of scheduling heuristics. - if (SDNode *N = SU->getNode()) + if (SDNode *N = SU.getNode()) if (N->getOpcode() == ISD::CopyToReg && TargetRegisterInfo::isVirtualRegister (cast<RegisterSDNode>(N->getOperand(1))->getReg())) @@ -2843,10 +2815,9 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // Locate the single data predecessor. SUnit *PredSU = nullptr; - for (SUnit::const_pred_iterator II = SU->Preds.begin(), - EE = SU->Preds.end(); II != EE; ++II) - if (!II->isCtrl()) { - PredSU = II->getSUnit(); + for (const SDep &Pred : SU.Preds) + if (!Pred.isCtrl()) { + PredSU = Pred.getSUnit(); break; } assert(PredSU); @@ -2860,44 +2831,43 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { continue; // Avoid prescheduling to copies from virtual registers, which don't behave // like other nodes from the perspective of scheduling heuristics. - if (SDNode *N = SU->getNode()) + if (SDNode *N = SU.getNode()) if (N->getOpcode() == ISD::CopyFromReg && TargetRegisterInfo::isVirtualRegister (cast<RegisterSDNode>(N->getOperand(1))->getReg())) continue; // Perform checks on the successors of PredSU. - for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), - EE = PredSU->Succs.end(); II != EE; ++II) { - SUnit *PredSuccSU = II->getSUnit(); - if (PredSuccSU == SU) continue; + for (const SDep &PredSucc : PredSU->Succs) { + SUnit *PredSuccSU = PredSucc.getSUnit(); + if (PredSuccSU == &SU) continue; // If PredSU has another successor with no data successors, for // now don't attempt to choose either over the other. if (PredSuccSU->NumSuccs == 0) goto outer_loop_continue; // Don't break physical register dependencies. - if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) - if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) + if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) + if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI)) goto outer_loop_continue; // Don't introduce graph cycles. - if (scheduleDAG->IsReachable(SU, PredSuccSU)) + if (scheduleDAG->IsReachable(&SU, PredSuccSU)) goto outer_loop_continue; } // Ok, the transformation is safe and the heuristics suggest it is // profitable. Update the graph. - DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum + DEBUG(dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #" << PredSU->NodeNum << " to guide scheduling in the presence of multiple uses\n"); for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { SDep Edge = PredSU->Succs[i]; assert(!Edge.isAssignedRegDep()); SUnit *SuccSU = Edge.getSUnit(); - if (SuccSU != SU) { + if (SuccSU != &SU) { Edge.setSUnit(PredSU); scheduleDAG->RemovePred(SuccSU, Edge); - scheduleDAG->AddPred(SU, Edge); - Edge.setSUnit(SU); + scheduleDAG->AddPred(&SU, Edge); + Edge.setSUnit(&SU); scheduleDAG->AddPred(SuccSU, Edge); --i; } @@ -2914,16 +2884,15 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { /// If both are two-address, but one is commutable while the other is not /// commutable, favor the one that's not commutable. void RegReductionPQBase::AddPseudoTwoAddrDeps() { - for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { - SUnit *SU = &(*SUnits)[i]; - if (!SU->isTwoAddress) + for (SUnit &SU : *SUnits) { + if (!SU.isTwoAddress) continue; - SDNode *Node = SU->getNode(); - if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode()) + SDNode *Node = SU.getNode(); + if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode()) continue; - bool isLiveOut = hasOnlyLiveOutUses(SU); + bool isLiveOut = hasOnlyLiveOutUses(&SU); unsigned Opc = Node->getMachineOpcode(); const MCInstrDesc &MCID = TII->get(Opc); unsigned NumRes = MCID.getNumDefs(); @@ -2931,21 +2900,22 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { for (unsigned j = 0; j != NumOps; ++j) { if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) continue; - SDNode *DU = SU->getNode()->getOperand(j).getNode(); + SDNode *DU = SU.getNode()->getOperand(j).getNode(); if (DU->getNodeId() == -1) continue; const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; - if (!DUSU) continue; - for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), - E = DUSU->Succs.end(); I != E; ++I) { - if (I->isCtrl()) continue; - SUnit *SuccSU = I->getSUnit(); - if (SuccSU == SU) + if (!DUSU) + continue; + for (const SDep &Succ : DUSU->Succs) { + if (Succ.isCtrl()) + continue; + SUnit *SuccSU = Succ.getSUnit(); + if (SuccSU == &SU) continue; // Be conservative. Ignore if nodes aren't at roughly the same // depth and height. - if (SuccSU->getHeight() < SU->getHeight() && - (SU->getHeight() - SuccSU->getHeight()) > 1) + if (SuccSU->getHeight() < SU.getHeight() && + (SU.getHeight() - SuccSU->getHeight()) > 1) continue; // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge // constrains whatever is using the copy, instead of the copy @@ -2961,8 +2931,8 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { continue; // Don't constrain nodes with physical register defs if the // predecessor can clobber them. - if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { - if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) + if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) { + if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI)) continue; } // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; @@ -2972,14 +2942,14 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { SuccOpc == TargetOpcode::INSERT_SUBREG || SuccOpc == TargetOpcode::SUBREG_TO_REG) continue; - if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) && + if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) && (!canClobber(SuccSU, DUSU) || (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || - (!SU->isCommutable && SuccSU->isCommutable)) && - !scheduleDAG->IsReachable(SuccSU, SU)) { + (!SU.isCommutable && SuccSU->isCommutable)) && + !scheduleDAG->IsReachable(SuccSU, &SU)) { DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" - << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); - scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Artificial)); + << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); + scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial)); } } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 2a6c853..3be622f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -321,7 +321,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // Add all nodes in depth first order. SmallVector<SDNode*, 64> Worklist; - SmallPtrSet<SDNode*, 64> Visited; + SmallPtrSet<SDNode*, 32> Visited; Worklist.push_back(DAG->getRoot().getNode()); Visited.insert(DAG->getRoot().getNode()); @@ -750,7 +750,7 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, return; } - Orders.push_back(std::make_pair(Order, std::prev(Emitter.getInsertPos()))); + Orders.push_back(std::make_pair(Order, &*std::prev(Emitter.getInsertPos()))); ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 893871f..29d11c7 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -33,7 +34,6 @@ #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" @@ -46,7 +46,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSelectionDAGInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cmath> @@ -94,8 +93,22 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT, // ISD Namespace //===----------------------------------------------------------------------===// -/// isBuildVectorAllOnes - Return true if the specified node is a -/// BUILD_VECTOR where all of the elements are ~0 or undef. +bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { + auto *BV = dyn_cast<BuildVectorSDNode>(N); + if (!BV) + return false; + + APInt SplatUndef; + unsigned SplatBitSize; + bool HasUndefs; + EVT EltVT = N->getValueType(0).getVectorElementType(); + return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs) && + EltVT.getSizeInBits() >= SplatBitSize; +} + +// FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be +// specializations of the more general isConstantSplatVector()? + bool ISD::isBuildVectorAllOnes(const SDNode *N) { // Look through a bit convert. while (N->getOpcode() == ISD::BITCAST) @@ -106,7 +119,7 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) { unsigned i = 0, e = N->getNumOperands(); // Skip over all of the undef values. - while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF) + while (i != e && N->getOperand(i).isUndef()) ++i; // Do not accept an all-undef vector. @@ -135,15 +148,11 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) { // undefs. Even with the above element type twiddling, this should be OK, as // the same type legalization should have applied to all the elements. for (++i; i != e; ++i) - if (N->getOperand(i) != NotZero && - N->getOperand(i).getOpcode() != ISD::UNDEF) + if (N->getOperand(i) != NotZero && !N->getOperand(i).isUndef()) return false; return true; } - -/// isBuildVectorAllZeros - Return true if the specified node is a -/// BUILD_VECTOR where all of the elements are 0 or undef. bool ISD::isBuildVectorAllZeros(const SDNode *N) { // Look through a bit convert. while (N->getOpcode() == ISD::BITCAST) @@ -153,7 +162,7 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { bool IsAllUndef = true; for (const SDValue &Op : N->op_values()) { - if (Op.getOpcode() == ISD::UNDEF) + if (Op.isUndef()) continue; IsAllUndef = false; // Do not accept build_vectors that aren't all constants or which have non-0 @@ -181,14 +190,12 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { return true; } -/// \brief Return true if the specified node is a BUILD_VECTOR node of -/// all ConstantSDNode or undef. bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) { if (N->getOpcode() != ISD::BUILD_VECTOR) return false; for (const SDValue &Op : N->op_values()) { - if (Op.getOpcode() == ISD::UNDEF) + if (Op.isUndef()) continue; if (!isa<ConstantSDNode>(Op)) return false; @@ -196,14 +203,12 @@ bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) { return true; } -/// \brief Return true if the specified node is a BUILD_VECTOR node of -/// all ConstantFPSDNode or undef. bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) { if (N->getOpcode() != ISD::BUILD_VECTOR) return false; for (const SDValue &Op : N->op_values()) { - if (Op.getOpcode() == ISD::UNDEF) + if (Op.isUndef()) continue; if (!isa<ConstantFPSDNode>(Op)) return false; @@ -211,8 +216,6 @@ bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) { return true; } -/// allOperandsUndef - Return true if the node has at least one operand -/// and all operands of the specified node are ISD::UNDEF. bool ISD::allOperandsUndef(const SDNode *N) { // Return false if the node has no operands. // This is "logically inconsistent" with the definition of "all" but @@ -221,7 +224,7 @@ bool ISD::allOperandsUndef(const SDNode *N) { return false; for (const SDValue &Op : N->op_values()) - if (Op.getOpcode() != ISD::UNDEF) + if (!Op.isUndef()) return false; return true; @@ -242,8 +245,6 @@ ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) { llvm_unreachable("Invalid LoadExtType"); } -/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X) -/// when given the operation for (X op Y). ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { // To perform this operation, we just need to swap the L and G bits of the // operation. @@ -254,8 +255,6 @@ ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { (OldG << 2)); // New L bit. } -/// getSetCCInverse - Return the operation corresponding to !(X op Y), where -/// 'op' is a valid SetCC operation. ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { unsigned Operation = Op; if (isInteger) @@ -270,9 +269,9 @@ ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { } -/// isSignedOp - For an integer comparison, return 1 if the comparison is a -/// signed operation and 2 if the result is an unsigned comparison. Return zero -/// if the operation does not depend on the sign of the input (setne and seteq). +/// For an integer comparison, return 1 if the comparison is a signed operation +/// and 2 if the result is an unsigned comparison. Return zero if the operation +/// does not depend on the sign of the input (setne and seteq). static int isSignedOp(ISD::CondCode Opcode) { switch (Opcode) { default: llvm_unreachable("Illegal integer setcc operation!"); @@ -289,10 +288,6 @@ static int isSignedOp(ISD::CondCode Opcode) { } } -/// getSetCCOrOperation - Return the result of a logical OR between different -/// comparisons of identical values: ((X op1 Y) | (X op2 Y)). This function -/// returns SETCC_INVALID if it is not possible to represent the resultant -/// comparison. ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2, bool isInteger) { if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) @@ -313,10 +308,6 @@ ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2, return ISD::CondCode(Op); } -/// getSetCCAndOperation - Return the result of a logical AND between different -/// comparisons of identical values: ((X op1 Y) & (X op2 Y)). This -/// function returns zero if it is not possible to represent the resultant -/// comparison. ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, bool isInteger) { if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) @@ -644,7 +635,8 @@ void SelectionDAG::DeleteNode(SDNode *N) { } void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) { - assert(N != AllNodes.begin() && "Cannot delete the entry node!"); + assert(N->getIterator() != AllNodes.begin() && + "Cannot delete the entry node!"); assert(N->use_empty() && "Cannot delete a node that is not dead!"); // Drop all of the operands and decrement used node's use counts. @@ -663,8 +655,8 @@ void SDDbgInfo::erase(const SDNode *Node) { } void SelectionDAG::DeallocateNode(SDNode *N) { - if (N->OperandsNeedDelete) - delete[] N->OperandList; + // If we have operands, deallocate them. + removeOperands(N); // Set the opcode to DELETED_NODE to help catch bugs when node // memory is reallocated. @@ -832,7 +824,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); - SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); + SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); if (Node) if (const SDNodeFlags *Flags = N->getFlags()) Node->intersectFlagsWith(Flags); @@ -853,7 +845,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); - SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); + SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); if (Node) if (const SDNodeFlags *Flags = N->getFlags()) Node->intersectFlagsWith(Flags); @@ -873,16 +865,13 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); - SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); + SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); if (Node) if (const SDNodeFlags *Flags = N->getFlags()) Node->intersectFlagsWith(Flags); return Node; } -/// getEVTAlignment - Compute the default alignment value for the -/// given type. -/// unsigned SelectionDAG::getEVTAlignment(EVT VT) const { Type *Ty = VT == MVT::iPTR ? PointerType::get(Type::getInt8Ty(*getContext()), 0) : @@ -911,6 +900,7 @@ void SelectionDAG::init(MachineFunction &mf) { SelectionDAG::~SelectionDAG() { assert(!UpdateListeners && "Dangling registered DAGUpdateListeners"); allnodes_clear(); + OperandRecycler.clear(OperandAllocator); delete DbgInfo; } @@ -924,24 +914,26 @@ void SelectionDAG::allnodes_clear() { #endif } -BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL, - SDVTList VTs, SDValue N1, - SDValue N2, - const SDNodeFlags *Flags) { +SDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, const SDLoc &DL, + SDVTList VTs, SDValue N1, SDValue N2, + const SDNodeFlags *Flags) { + SDValue Ops[] = {N1, N2}; + if (isBinOpWithFlags(Opcode)) { // If no flags were passed in, use a default flags object. SDNodeFlags F; if (Flags == nullptr) Flags = &F; - BinaryWithFlagsSDNode *FN = new (NodeAllocator) BinaryWithFlagsSDNode( - Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2, *Flags); + auto *FN = newSDNode<BinaryWithFlagsSDNode>(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, *Flags); + createOperands(FN, Ops); return FN; } - BinarySDNode *N = new (NodeAllocator) - BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2); + auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); return N; } @@ -961,19 +953,25 @@ SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, } SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, - DebugLoc DL, void *&InsertPos) { + const SDLoc &DL, void *&InsertPos) { SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); if (N) { switch (N->getOpcode()) { - default: break; // Process only regular (non-target) constant nodes. case ISD::Constant: case ISD::ConstantFP: // Erase debug location from the node if the node is used at several - // different places to do not propagate one location to all uses as it - // leads to incorrect debug info. - if (N->getDebugLoc() != DL) + // different places. Do not propagate one location to all uses as it + // will cause a worse single stepping debugging experience. + if (N->getDebugLoc() != DL.getDebugLoc()) N->setDebugLoc(DebugLoc()); break; + default: + // When the node's point of use is located earlier in the instruction + // sequence than its prior point of use, update its debug info to the + // earlier location. + if (DL.getIROrder() && DL.getIROrder() < N->getIROrder()) + N->setDebugLoc(DL.getDebugLoc()); + break; } } return N; @@ -981,6 +979,7 @@ SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, void SelectionDAG::clear() { allnodes_clear(); + OperandRecycler.clear(OperandAllocator); OperandAllocator.Reset(); CSEMap.clear(); @@ -999,25 +998,25 @@ void SelectionDAG::clear() { DbgInfo->clear(); } -SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { +SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::ANY_EXTEND, DL, VT, Op) : getNode(ISD::TRUNCATE, DL, VT, Op); } -SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { +SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::SIGN_EXTEND, DL, VT, Op) : getNode(ISD::TRUNCATE, DL, VT, Op); } -SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { +SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::ZERO_EXTEND, DL, VT, Op) : getNode(ISD::TRUNCATE, DL, VT, Op); } -SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT, +SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT) { if (VT.bitsLE(Op.getValueType())) return getNode(ISD::TRUNCATE, SL, VT, Op); @@ -1026,7 +1025,7 @@ SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT, return getNode(TLI->getExtendForContent(BType), SL, VT, Op); } -SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { +SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { assert(!VT.isVector() && "getZeroExtendInReg should use the vector element type instead of " "the vector type!"); @@ -1038,7 +1037,8 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { getConstant(Imm, DL, Op.getValueType())); } -SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { +SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL, + EVT VT) { assert(VT.isVector() && "This DAG node is restricted to vector types."); assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && "The sizes of the input and result must match in order to perform the " @@ -1048,7 +1048,8 @@ SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { return getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Op); } -SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { +SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, const SDLoc &DL, + EVT VT) { assert(VT.isVector() && "This DAG node is restricted to vector types."); assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && "The sizes of the input and result must match in order to perform the " @@ -1058,7 +1059,8 @@ SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { return getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, Op); } -SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { +SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL, + EVT VT) { assert(VT.isVector() && "This DAG node is restricted to vector types."); assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && "The sizes of the input and result must match in order to perform the " @@ -1070,14 +1072,14 @@ SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). /// -SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { +SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) { EVT EltVT = VT.getScalarType(); SDValue NegOne = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT); return getNode(ISD::XOR, DL, VT, Val, NegOne); } -SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) { +SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) { EVT EltVT = VT.getScalarType(); SDValue TrueValue; switch (TLI->getBooleanContents(VT)) { @@ -1093,8 +1095,8 @@ SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) { return getNode(ISD::XOR, DL, VT, Val, TrueValue); } -SDValue SelectionDAG::getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isT, - bool isO) { +SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT, + bool isT, bool isO) { EVT EltVT = VT.getScalarType(); assert((EltVT.getSizeInBits() >= 64 || (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) && @@ -1102,14 +1104,13 @@ SDValue SelectionDAG::getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isT, return getConstant(APInt(EltVT.getSizeInBits(), Val), DL, VT, isT, isO); } -SDValue SelectionDAG::getConstant(const APInt &Val, SDLoc DL, EVT VT, bool isT, - bool isO) -{ +SDValue SelectionDAG::getConstant(const APInt &Val, const SDLoc &DL, EVT VT, + bool isT, bool isO) { return getConstant(*ConstantInt::get(*Context, Val), DL, VT, isT, isO); } -SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT, - bool isT, bool isO) { +SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, + EVT VT, bool isT, bool isO) { assert(VT.isInteger() && "Cannot create FP integer constant!"); EVT EltVT = VT.getScalarType(); @@ -1134,7 +1135,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT, else if (NewNodesMustHaveLegalTypes && VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) == TargetLowering::TypeExpandInteger) { - APInt NewVal = Elt->getValue(); + const APInt &NewVal = Elt->getValue(); EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits(); unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits; @@ -1168,9 +1169,8 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT, for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) Ops.insert(Ops.end(), EltParts.begin(), EltParts.end()); - SDValue Result = getNode(ISD::BITCAST, SDLoc(), VT, - getNode(ISD::BUILD_VECTOR, SDLoc(), ViaVecVT, - Ops)); + SDValue Result = getNode(ISD::BITCAST, DL, VT, + getNode(ISD::BUILD_VECTOR, DL, ViaVecVT, Ops)); return Result; } @@ -1183,37 +1183,34 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT, ID.AddBoolean(isO); void *IP = nullptr; SDNode *N = nullptr; - if ((N = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))) + if ((N = FindNodeOrInsertPos(ID, DL, IP))) if (!VT.isVector()) return SDValue(N, 0); if (!N) { - N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, DL.getDebugLoc(), - EltVT); + N = newSDNode<ConstantSDNode>(isT, isO, Elt, DL.getDebugLoc(), EltVT); CSEMap.InsertNode(N, IP); InsertNode(N); } SDValue Result(N, 0); - if (VT.isVector()) { - SmallVector<SDValue, 8> Ops; - Ops.assign(VT.getVectorNumElements(), Result); - Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Ops); - } + if (VT.isVector()) + Result = getSplatBuildVector(VT, DL, Result); return Result; } -SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, SDLoc DL, bool isTarget) { +SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL, + bool isTarget) { return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget); } -SDValue SelectionDAG::getConstantFP(const APFloat& V, SDLoc DL, EVT VT, +SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT, bool isTarget) { return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget); } -SDValue SelectionDAG::getConstantFP(const ConstantFP& V, SDLoc DL, EVT VT, - bool isTarget){ +SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL, + EVT VT, bool isTarget) { assert(VT.isFloatingPoint() && "Cannot create integer FP constant!"); EVT EltVT = VT.getScalarType(); @@ -1227,47 +1224,42 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, SDLoc DL, EVT VT, ID.AddPointer(&V); void *IP = nullptr; SDNode *N = nullptr; - if ((N = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))) + if ((N = FindNodeOrInsertPos(ID, DL, IP))) if (!VT.isVector()) return SDValue(N, 0); if (!N) { - N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, DL.getDebugLoc(), - EltVT); + N = newSDNode<ConstantFPSDNode>(isTarget, &V, DL.getDebugLoc(), EltVT); CSEMap.InsertNode(N, IP); InsertNode(N); } SDValue Result(N, 0); - if (VT.isVector()) { - SmallVector<SDValue, 8> Ops; - Ops.assign(VT.getVectorNumElements(), Result); - Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Ops); - } + if (VT.isVector()) + Result = getSplatBuildVector(VT, DL, Result); return Result; } -SDValue SelectionDAG::getConstantFP(double Val, SDLoc DL, EVT VT, +SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget) { EVT EltVT = VT.getScalarType(); - if (EltVT==MVT::f32) + if (EltVT == MVT::f32) return getConstantFP(APFloat((float)Val), DL, VT, isTarget); - else if (EltVT==MVT::f64) + else if (EltVT == MVT::f64) return getConstantFP(APFloat(Val), DL, VT, isTarget); - else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::ppcf128 || - EltVT==MVT::f16) { - bool ignored; - APFloat apf = APFloat(Val); - apf.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, - &ignored); - return getConstantFP(apf, DL, VT, isTarget); + else if (EltVT == MVT::f80 || EltVT == MVT::f128 || EltVT == MVT::ppcf128 || + EltVT == MVT::f16) { + bool Ignored; + APFloat APF = APFloat(Val); + APF.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, + &Ignored); + return getConstantFP(APF, DL, VT, isTarget); } else llvm_unreachable("Unsupported type in getConstantFP"); } -SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, - EVT VT, int64_t Offset, - bool isTargetGA, +SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, + EVT VT, int64_t Offset, bool isTargetGA, unsigned char TargetFlags) { assert((TargetFlags == 0 || isTargetGA) && "Cannot set target flags on target-independent globals"); @@ -1290,12 +1282,11 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, ID.AddInteger(TargetFlags); ID.AddInteger(GV->getType()->getAddressSpace()); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL.getIROrder(), - DL.getDebugLoc(), GV, VT, - Offset, TargetFlags); + auto *N = newSDNode<GlobalAddressSDNode>( + Opc, DL.getIROrder(), DL.getDebugLoc(), GV, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1310,7 +1301,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget); + auto *N = newSDNode<FrameIndexSDNode>(FI, VT, isTarget); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1329,8 +1320,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget, - TargetFlags); + auto *N = newSDNode<JumpTableSDNode>(JTI, VT, isTarget, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1355,8 +1345,8 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, - Alignment, TargetFlags); + auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment, + TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1382,8 +1372,8 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, - Alignment, TargetFlags); + auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment, + TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1400,8 +1390,7 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = - new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, TargetFlags); + auto *N = newSDNode<TargetIndexSDNode>(Index, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1415,7 +1404,7 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB); + auto *N = newSDNode<BasicBlockSDNode>(MBB); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1430,7 +1419,7 @@ SDValue SelectionDAG::getValueType(EVT VT) { ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy]; if (N) return SDValue(N, 0); - N = new (NodeAllocator) VTSDNode(VT); + N = newSDNode<VTSDNode>(VT); InsertNode(N); return SDValue(N, 0); } @@ -1438,7 +1427,7 @@ SDValue SelectionDAG::getValueType(EVT VT) { SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) { SDNode *&N = ExternalSymbols[Sym]; if (N) return SDValue(N, 0); - N = new (NodeAllocator) ExternalSymbolSDNode(false, Sym, 0, VT); + N = newSDNode<ExternalSymbolSDNode>(false, Sym, 0, VT); InsertNode(N); return SDValue(N, 0); } @@ -1447,7 +1436,7 @@ SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) { SDNode *&N = MCSymbols[Sym]; if (N) return SDValue(N, 0); - N = new (NodeAllocator) MCSymbolSDNode(Sym, VT); + N = newSDNode<MCSymbolSDNode>(Sym, VT); InsertNode(N); return SDValue(N, 0); } @@ -1458,7 +1447,7 @@ SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym, TargetFlags)]; if (N) return SDValue(N, 0); - N = new (NodeAllocator) ExternalSymbolSDNode(true, Sym, TargetFlags, VT); + N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, VT); InsertNode(N); return SDValue(N, 0); } @@ -1468,7 +1457,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { CondCodeNodes.resize(Cond+1); if (!CondCodeNodes[Cond]) { - CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond); + auto *N = newSDNode<CondCodeSDNode>(Cond); CondCodeNodes[Cond] = N; InsertNode(N); } @@ -1476,41 +1465,42 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { return SDValue(CondCodeNodes[Cond], 0); } -// commuteShuffle - swaps the values of N1 and N2, and swaps all indices in -// the shuffle mask M that point at N1 to point at N2, and indices that point -// N2 to point at N1. -static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) { +/// Swaps the values of N1 and N2. Swaps all indices in the shuffle mask M that +/// point at N1 to point at N2 and indices that point at N2 to point at N1. +static void commuteShuffle(SDValue &N1, SDValue &N2, MutableArrayRef<int> M) { std::swap(N1, N2); ShuffleVectorSDNode::commuteMask(M); } -SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, - SDValue N2, const int *Mask) { +SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, + SDValue N2, ArrayRef<int> Mask) { + assert(VT.getVectorNumElements() == Mask.size() && + "Must have the same number of vector elements as mask elements!"); assert(VT == N1.getValueType() && VT == N2.getValueType() && "Invalid VECTOR_SHUFFLE"); // Canonicalize shuffle undef, undef -> undef - if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF) + if (N1.isUndef() && N2.isUndef()) return getUNDEF(VT); // Validate that all indices in Mask are within the range of the elements // input to the shuffle. - unsigned NElts = VT.getVectorNumElements(); - SmallVector<int, 8> MaskVec; - for (unsigned i = 0; i != NElts; ++i) { - assert(Mask[i] < (int)(NElts * 2) && "Index out of range"); - MaskVec.push_back(Mask[i]); - } + int NElts = Mask.size(); + assert(all_of(Mask, [&](int M) { return M < (NElts * 2); }) && + "Index out of range"); + + // Copy the mask so we can do any needed cleanup. + SmallVector<int, 8> MaskVec(Mask.begin(), Mask.end()); // Canonicalize shuffle v, v -> v, undef if (N1 == N2) { N2 = getUNDEF(VT); - for (unsigned i = 0; i != NElts; ++i) - if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts; + for (int i = 0; i != NElts; ++i) + if (MaskVec[i] >= NElts) MaskVec[i] -= NElts; } // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) commuteShuffle(N1, N2, MaskVec); // If shuffling a splat, try to blend the splat instead. We do this here so @@ -1521,8 +1511,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, if (!Splat) return; - for (int i = 0; i < (int)NElts; ++i) { - if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + (int)NElts)) + for (int i = 0; i < NElts; ++i) { + if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts)) continue; // If this input comes from undef, mark it as such. @@ -1544,9 +1534,9 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, // Canonicalize all index into lhs, -> shuffle lhs, undef // Canonicalize all index into rhs, -> shuffle rhs, undef bool AllLHS = true, AllRHS = true; - bool N2Undef = N2.getOpcode() == ISD::UNDEF; - for (unsigned i = 0; i != NElts; ++i) { - if (MaskVec[i] >= (int)NElts) { + bool N2Undef = N2.isUndef(); + for (int i = 0; i != NElts; ++i) { + if (MaskVec[i] >= NElts) { if (N2Undef) MaskVec[i] = -1; else @@ -1564,15 +1554,15 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, commuteShuffle(N1, N2, MaskVec); } // Reset our undef status after accounting for the mask. - N2Undef = N2.getOpcode() == ISD::UNDEF; + N2Undef = N2.isUndef(); // Re-check whether both sides ended up undef. - if (N1.getOpcode() == ISD::UNDEF && N2Undef) + if (N1.isUndef() && N2Undef) return getUNDEF(VT); // If Identity shuffle return that node. bool Identity = true, AllSame = true; - for (unsigned i = 0; i != NElts; ++i) { - if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false; + for (int i = 0; i != NElts; ++i) { + if (MaskVec[i] >= 0 && MaskVec[i] != i) Identity = false; if (MaskVec[i] != MaskVec[0]) AllSame = false; } if (Identity && NElts) @@ -1592,7 +1582,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, BitVector UndefElements; SDValue Splat = BV->getSplatValue(&UndefElements); // If this is a splat of an undef, shuffling it is also undef. - if (Splat && Splat.getOpcode() == ISD::UNDEF) + if (Splat && Splat.isUndef()) return getUNDEF(VT); bool SameNumElts = @@ -1612,11 +1602,9 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, // If the shuffle itself creates a splat, build the vector directly. if (AllSame && SameNumElts) { - const SDValue &Splatted = BV->getOperand(MaskVec[0]); - SmallVector<SDValue, 8> Ops(NElts, Splatted); - EVT BuildVT = BV->getValueType(0); - SDValue NewBV = getNode(ISD::BUILD_VECTOR, dl, BuildVT, Ops); + const SDValue &Splatted = BV->getOperand(MaskVec[0]); + SDValue NewBV = getSplatBuildVector(BuildVT, dl, Splatted); // We may have jumped through bitcasts, so the type of the // BUILD_VECTOR may not match the type of the shuffle. @@ -1630,23 +1618,23 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, FoldingSetNodeID ID; SDValue Ops[2] = { N1, N2 }; AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops); - for (unsigned i = 0; i != NElts; ++i) + for (int i = 0; i != NElts; ++i) ID.AddInteger(MaskVec[i]); void* IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) return SDValue(E, 0); // Allocate the mask array for the node out of the BumpPtrAllocator, since // SDNode doesn't have access to it. This memory will be "leaked" when // the node is deallocated, but recovered when the NodeAllocator is released. int *MaskAlloc = OperandAllocator.Allocate<int>(NElts); - memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int)); + std::copy(MaskVec.begin(), MaskVec.end(), MaskAlloc); + + auto *N = newSDNode<ShuffleVectorSDNode>(VT, dl.getIROrder(), + dl.getDebugLoc(), MaskAlloc); + createOperands(N, Ops); - ShuffleVectorSDNode *N = - new (NodeAllocator) ShuffleVectorSDNode(VT, dl.getIROrder(), - dl.getDebugLoc(), N1, N2, - MaskAlloc); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1659,13 +1647,12 @@ SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { SDValue Op0 = SV.getOperand(0); SDValue Op1 = SV.getOperand(1); - return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, &MaskVec[0]); + return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, MaskVec); } -SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, - SDValue Val, SDValue DTy, - SDValue STy, SDValue Rnd, SDValue Sat, - ISD::CvtCode Code) { +SDValue SelectionDAG::getConvertRndSat(EVT VT, const SDLoc &dl, SDValue Val, + SDValue DTy, SDValue STy, SDValue Rnd, + SDValue Sat, ISD::CvtCode Code) { // If the src and dest types are the same and the conversion is between // integer types of the same sign or two floats, no conversion is necessary. if (DTy == STy && @@ -1676,12 +1663,13 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), Ops); void* IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) return SDValue(E, 0); - CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(), - dl.getDebugLoc(), - Ops, Code); + auto *N = + newSDNode<CvtRndSatSDNode>(VT, dl.getIROrder(), dl.getDebugLoc(), Code); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1695,7 +1683,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT); + auto *N = newSDNode<RegisterSDNode>(RegNo, VT); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1709,13 +1697,14 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask); + auto *N = newSDNode<RegisterMaskSDNode>(RegMask); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) { +SDValue SelectionDAG::getEHLabel(const SDLoc &dl, SDValue Root, + MCSymbol *Label) { FoldingSetNodeID ID; SDValue Ops[] = { Root }; AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), Ops); @@ -1724,14 +1713,14 @@ SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) { if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(), - dl.getDebugLoc(), Root, Label); + auto *N = newSDNode<EHLabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } - SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset, bool isTarget, @@ -1747,8 +1736,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset, - TargetFlags); + auto *N = newSDNode<BlockAddressSDNode>(Opc, VT, BA, Offset, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1766,13 +1754,12 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) SrcValueSDNode(V); + auto *N = newSDNode<SrcValueSDNode>(V); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -/// getMDNode - Return an MDNodeSDNode which holds an MDNode. SDValue SelectionDAG::getMDNode(const MDNode *MD) { FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), None); @@ -1782,7 +1769,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) MDNodeSDNode(MD); + auto *N = newSDNode<MDNodeSDNode>(MD); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1795,8 +1782,7 @@ SDValue SelectionDAG::getBitcast(EVT VT, SDValue V) { return getNode(ISD::BITCAST, SDLoc(V), VT, V); } -/// getAddrSpaceCast - Return an AddrSpaceCastSDNode. -SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, +SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS) { SDValue Ops[] = {Ptr}; FoldingSetNodeID ID; @@ -1805,12 +1791,13 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, ID.AddInteger(DestAS); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) AddrSpaceCastSDNode(dl.getIROrder(), - dl.getDebugLoc(), - VT, Ptr, SrcAS, DestAS); + auto *N = newSDNode<AddrSpaceCastSDNode>(dl.getIROrder(), dl.getDebugLoc(), + VT, SrcAS, DestAS); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1835,9 +1822,8 @@ SDValue SelectionDAG::expandVAArg(SDNode *Node) { SDValue Tmp2 = Node->getOperand(1); unsigned Align = Node->getConstantOperandVal(3); - SDValue VAListLoad = - getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, Tmp2, - MachinePointerInfo(V), false, false, false, 0); + SDValue VAListLoad = getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, + Tmp2, MachinePointerInfo(V)); SDValue VAList = VAListLoad; if (Align > TLI.getMinStackArgumentAlignment()) { @@ -1856,11 +1842,10 @@ SDValue SelectionDAG::expandVAArg(SDNode *Node) { VT.getTypeForEVT(*getContext())), dl, VAList.getValueType())); // Store the incremented VAList to the legalized pointer - Tmp1 = getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2, - MachinePointerInfo(V), false, false, 0); + Tmp1 = + getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2, MachinePointerInfo(V)); // Load the actual argument out of the pointer VAList - return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo(), - false, false, false, 0); + return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo()); } SDValue SelectionDAG::expandVACopy(SDNode *Node) { @@ -1870,15 +1855,13 @@ SDValue SelectionDAG::expandVACopy(SDNode *Node) { // output, returning the chain. const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue(); const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); - SDValue Tmp1 = getLoad(TLI.getPointerTy(getDataLayout()), dl, - Node->getOperand(0), Node->getOperand(2), - MachinePointerInfo(VS), false, false, false, 0); + SDValue Tmp1 = + getLoad(TLI.getPointerTy(getDataLayout()), dl, Node->getOperand(0), + Node->getOperand(2), MachinePointerInfo(VS)); return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), - MachinePointerInfo(VD), false, false, 0); + MachinePointerInfo(VD)); } -/// CreateStackTemporary - Create a stack temporary, suitable for holding the -/// specified value type. SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); unsigned ByteSize = VT.getStoreSize(); @@ -1890,8 +1873,6 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout())); } -/// CreateStackTemporary - Create a stack temporary suitable for holding -/// either of the specified value types. SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize()); Type *Ty1 = VT1.getTypeForEVT(*getContext()); @@ -1905,8 +1886,8 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout())); } -SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, - SDValue N2, ISD::CondCode Cond, SDLoc dl) { +SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, + ISD::CondCode Cond, const SDLoc &dl) { // These setcc operations always fold. switch (Cond) { default: break; @@ -2469,6 +2450,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownOne = KnownOne.trunc(BitWidth); break; } + case ISD::BSWAP: { + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + KnownZero = KnownZero2.byteSwap(); + KnownOne = KnownOne2.byteSwap(); + break; + } case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: @@ -2506,12 +2493,36 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } -/// ComputeNumSignBits - Return the number of times the sign bit of the -/// register is replicated into the other bits. We know that at least 1 bit -/// is always equal to the sign bit (itself), but other cases can give us -/// information. For example, immediately after an "SRA X, 2", we know that -/// the top 3 bits are all equal to each other, so we return 3. -unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ +bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { + // A left-shift of a constant one will have exactly one bit set because + // shifting the bit off the end is undefined. + if (Val.getOpcode() == ISD::SHL) { + auto *C = dyn_cast<ConstantSDNode>(Val.getOperand(0)); + if (C && C->getAPIntValue() == 1) + return true; + } + + // Similarly, a logical right-shift of a constant sign-bit will have exactly + // one bit set. + if (Val.getOpcode() == ISD::SRL) { + auto *C = dyn_cast<ConstantSDNode>(Val.getOperand(0)); + if (C && C->getAPIntValue().isSignBit()) + return true; + } + + // More could be done here, though the above checks are enough + // to handle some common cases. + + // Fall back to computeKnownBits to catch other known cases. + EVT OpVT = Val.getValueType(); + unsigned BitWidth = OpVT.getScalarType().getSizeInBits(); + APInt KnownZero, KnownOne; + computeKnownBits(Val, KnownZero, KnownOne); + return (KnownZero.countPopulation() == BitWidth - 1) && + (KnownOne.countPopulation() == 1); +} + +unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { EVT VT = Op.getValueType(); assert(VT.isInteger() && "Invalid VT!"); unsigned VTBits = VT.getScalarType().getSizeInBits(); @@ -2761,11 +2772,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros())); } -/// isBaseWithConstantOffset - Return true if the specified operand is an -/// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an -/// ISD::OR with a ConstantSDNode that is guaranteed to have the same -/// semantics as an ADD. This handles the equivalence: -/// X|Cst == X+Cst iff X&Cst = 0. bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) || !isa<ConstantSDNode>(Op.getOperand(1))) @@ -2779,7 +2785,6 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { return true; } - bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { // If we're told that NaNs won't happen, assume they won't. if (getTarget().Options.NoNaNsFPMath) @@ -2834,28 +2839,30 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { return (AZero | BZero).isAllOnesValue(); } -static SDValue FoldCONCAT_VECTORS(SDLoc DL, EVT VT, ArrayRef<SDValue> Ops, +static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT, + ArrayRef<SDValue> Ops, llvm::SelectionDAG &DAG) { if (Ops.size() == 1) return Ops[0]; // Concat of UNDEFs is UNDEF. - if (std::all_of(Ops.begin(), Ops.end(), - [](SDValue Op) { return Op.isUndef(); })) + if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); })) return DAG.getUNDEF(VT); - // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified - // to one big BUILD_VECTOR. - // FIXME: Add support for UNDEF and SCALAR_TO_VECTOR as well. - if (!std::all_of(Ops.begin(), Ops.end(), [](SDValue Op) { - return Op.getOpcode() == ISD::BUILD_VECTOR; - })) - return SDValue(); - + // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be + // simplified to one big BUILD_VECTOR. + // FIXME: Add support for SCALAR_TO_VECTOR as well. EVT SVT = VT.getScalarType(); SmallVector<SDValue, 16> Elts; - for (SDValue Op : Ops) - Elts.append(Op->op_begin(), Op->op_end()); + for (SDValue Op : Ops) { + EVT OpVT = Op.getValueType(); + if (Op.isUndef()) + Elts.append(OpVT.getVectorNumElements(), DAG.getUNDEF(SVT)); + else if (Op.getOpcode() == ISD::BUILD_VECTOR) + Elts.append(Op->op_begin(), Op->op_end()); + else + return SDValue(); + } // BUILD_VECTOR requires all inputs to be of the same type, find the // maximum type and extend them all. @@ -2871,25 +2878,24 @@ static SDValue FoldCONCAT_VECTORS(SDLoc DL, EVT VT, ArrayRef<SDValue> Ops, return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts); } -/// getNode - Gets or creates the specified node. -/// -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { +/// Gets or creates the specified node. +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, getVTList(VT), None); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), getVTList(VT)); + auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), + getVTList(VT)); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, - EVT VT, SDValue Operand) { +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, + SDValue Operand) { // Constant fold unary operations with an integer constant operand. Even // opaque constant will be folded, because the folding of unary operations // doesn't create new constants with different values. Nevertheless, the @@ -3054,7 +3060,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, "Vector element count mismatch!"); assert(Operand.getValueType().bitsLT(VT) && "Invalid fpext node, dst < src!"); - if (Operand.getOpcode() == ISD::UNDEF) + if (Operand.isUndef()) return getUNDEF(VT); break; case ISD::SIGN_EXTEND: @@ -3148,6 +3154,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); break; + case ISD::BITREVERSE: + assert(VT.isInteger() && VT == Operand.getValueType() && + "Invalid BITREVERSE!"); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + break; case ISD::BITCAST: // Basic sanity checking. assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits() @@ -3192,20 +3204,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDNode *N; SDVTList VTs = getVTList(VT); + SDValue Ops[] = {Operand}; if (VT != MVT::Glue) { // Don't CSE flag producing nodes FoldingSetNodeID ID; - SDValue Ops[1] = { Operand }; AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) return SDValue(E, 0); - N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTs, Operand); + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTs, Operand); + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); } InsertNode(N); @@ -3250,8 +3262,8 @@ static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1, return std::make_pair(APInt(1, 0), false); } -SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, - const ConstantSDNode *Cst1, +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, + EVT VT, const ConstantSDNode *Cst1, const ConstantSDNode *Cst2) { if (Cst1->isOpaque() || Cst2->isOpaque()) return SDValue(); @@ -3263,8 +3275,29 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, return getConstant(Folded.first, DL, VT); } -SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, - SDNode *Cst1, SDNode *Cst2) { +SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, + const GlobalAddressSDNode *GA, + const SDNode *N2) { + if (GA->getOpcode() != ISD::GlobalAddress) + return SDValue(); + if (!TLI->isOffsetFoldingLegal(GA)) + return SDValue(); + const ConstantSDNode *Cst2 = dyn_cast<ConstantSDNode>(N2); + if (!Cst2) + return SDValue(); + int64_t Offset = Cst2->getSExtValue(); + switch (Opcode) { + case ISD::ADD: break; + case ISD::SUB: Offset = -uint64_t(Offset); break; + default: return SDValue(); + } + return getGlobalAddress(GA->getGlobal(), SDLoc(Cst2), VT, + GA->getOffset() + uint64_t(Offset)); +} + +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, + EVT VT, SDNode *Cst1, + SDNode *Cst2) { // If the opcode is a target-specific ISD node, there's nothing we can // do here and the operand rules may not line up with the below, so // bail early. @@ -3274,21 +3307,20 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, // Handle the case of two scalars. if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) { if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) { - if (SDValue Folded = - FoldConstantArithmetic(Opcode, DL, VT, Scalar1, Scalar2)) { - if (!VT.isVector()) - return Folded; - SmallVector<SDValue, 4> Outputs; - // We may have a vector type but a scalar result. Create a splat. - Outputs.resize(VT.getVectorNumElements(), Outputs.back()); - // Build a big vector out of the scalar elements we generated. - return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs); - } else { - return SDValue(); - } + SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, Scalar1, Scalar2); + assert((!Folded || !VT.isVector()) && + "Can't fold vectors ops with scalar operands"); + return Folded; } } + // fold (add Sym, c) -> Sym+c + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst1)) + return FoldSymbolOffset(Opcode, VT, GA, Cst2); + if (isCommutativeBinOp(Opcode)) + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst2)) + return FoldSymbolOffset(Opcode, VT, GA, Cst1); + // For vectors extract each constant element into Inputs so we can constant // fold them individually. BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1); @@ -3329,11 +3361,11 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, Outputs.resize(VT.getVectorNumElements(), Outputs.back()); // Build a big vector out of the scalar elements we generated. - return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs); + return getBuildVector(VT, SDLoc(), Outputs); } -SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL, - EVT VT, +SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, + const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops, const SDNodeFlags *Flags) { // If the opcode is a target-specific ISD node, there's nothing we can @@ -3355,8 +3387,8 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL, auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) { BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op); - return (Op.getOpcode() == ISD::UNDEF) || - (Op.getOpcode() == ISD::CONDCODE) || (BV && BV->isConstant()); + return (Op.isUndef()) || (Op.getOpcode() == ISD::CONDCODE) || + (BV && BV->isConstant()); }; // All operands must be vector types with the same number of elements as @@ -3375,7 +3407,7 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL, EVT LegalSVT = VT.getScalarType(); if (LegalSVT.isInteger()) { LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); - if (LegalSVT.bitsLT(SVT)) + if (LegalSVT.bitsLT(VT.getScalarType())) return SDValue(); } @@ -3414,20 +3446,18 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL, ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult); // Scalar folding only succeeded if the result is a constant or UNDEF. - if (ScalarResult.getOpcode() != ISD::UNDEF && - ScalarResult.getOpcode() != ISD::Constant && + if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant && ScalarResult.getOpcode() != ISD::ConstantFP) return SDValue(); ScalarResults.push_back(ScalarResult); } - assert(ScalarResults.size() == NumElts && - "Unexpected number of scalar results for BUILD_VECTOR"); - return getNode(ISD::BUILD_VECTOR, DL, VT, ScalarResults); + return getBuildVector(VT, DL, ScalarResults); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, - SDValue N2, const SDNodeFlags *Flags) { +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, + SDValue N1, SDValue N2, + const SDNodeFlags *Flags) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); @@ -3617,14 +3647,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, }; if (N1C) { - APInt Val = N1C->getAPIntValue(); + const APInt &Val = N1C->getAPIntValue(); return SignExtendInReg(Val); } if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) { SmallVector<SDValue, 8> Ops; for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { SDValue Op = N1.getOperand(i); - if (Op.getOpcode() == ISD::UNDEF) { + if (Op.isUndef()) { Ops.push_back(getUNDEF(VT.getScalarType())); continue; } @@ -3637,13 +3667,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, break; } if (Ops.size() == VT.getVectorNumElements()) - return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + return getBuildVector(VT, DL, Ops); } break; } case ISD::EXTRACT_VECTOR_ELT: // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF. - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) return getUNDEF(VT); // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF @@ -3802,7 +3832,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } // Canonicalize an UNDEF to the RHS, even over a constant. - if (N1.getOpcode() == ISD::UNDEF) { + if (N1.isUndef()) { if (isCommutativeBinOp(Opcode)) { std::swap(N1, N2); } else { @@ -3831,10 +3861,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } // Fold a bunch of operators when the RHS is undef. - if (N2.getOpcode() == ISD::UNDEF) { + if (N2.isUndef()) { switch (Opcode) { case ISD::XOR: - if (N1.getOpcode() == ISD::UNDEF) + if (N1.isUndef()) // Handle undef ^ undef -> 0 special case. This is a common // idiom (misuse). return getConstant(0, DL, VT); @@ -3877,21 +3907,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } // Memoize this node if possible. - BinarySDNode *N; + SDNode *N; SDVTList VTs = getVTList(VT); if (VT != MVT::Glue) { SDValue Ops[] = {N1, N2}; FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { if (Flags) E->intersectFlagsWith(Flags); return SDValue(E, 0); } N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags); - CSEMap.InsertNode(N, IP); } else { N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags); @@ -3901,7 +3930,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, SDValue N2, SDValue N3) { // Perform various simplifications. switch (Opcode) { @@ -3982,36 +4011,35 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, // Memoize node if it doesn't produce a flag. SDNode *N; SDVTList VTs = getVTList(VT); + SDValue Ops[] = {N1, N2, N3}; if (VT != MVT::Glue) { - SDValue Ops[] = { N1, N2, N3 }; FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) return SDValue(E, 0); - N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTs, N1, N2, N3); + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTs, N1, N2, N3); + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); } InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, - SDValue N1, SDValue N2, SDValue N3, - SDValue N4) { +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, + SDValue N1, SDValue N2, SDValue N3, SDValue N4) { SDValue Ops[] = { N1, N2, N3, N4 }; return getNode(Opcode, DL, VT, Ops); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, - SDValue N1, SDValue N2, SDValue N3, - SDValue N4, SDValue N5) { +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, + SDValue N1, SDValue N2, SDValue N3, SDValue N4, + SDValue N5) { SDValue Ops[] = { N1, N2, N3, N4, N5 }; return getNode(Opcode, DL, VT, Ops); } @@ -4041,8 +4069,8 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { /// getMemsetValue - Vectorized representation of the memset value /// operand. static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, - SDLoc dl) { - assert(Value.getOpcode() != ISD::UNDEF); + const SDLoc &dl) { + assert(!Value.isUndef()); unsigned NumBits = VT.getScalarType().getSizeInBits(); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { @@ -4069,13 +4097,9 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, } if (VT != Value.getValueType() && !VT.isInteger()) - Value = DAG.getNode(ISD::BITCAST, dl, VT.getScalarType(), Value); - if (VT != Value.getValueType()) { - assert(VT.getVectorElementType() == Value.getValueType() && - "value type should be one vector element here"); - SmallVector<SDValue, 8> BVOps(VT.getVectorNumElements(), Value); - Value = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BVOps); - } + Value = DAG.getBitcast(VT.getScalarType(), Value); + if (VT != Value.getValueType()) + Value = DAG.getSplatBuildVector(VT, dl, Value); return Value; } @@ -4083,7 +4107,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, /// getMemsetStringVal - Similar to getMemsetValue. Except this is only /// used when a memcpy is turned into a memset when the source is a constant /// string ptr. -static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, +static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG, const TargetLowering &TLI, StringRef Str) { // Handle vector with all elements zero. if (Str.empty()) { @@ -4124,19 +4148,16 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, return SDValue(nullptr, 0); } -/// getMemBasePlusOffset - Returns base and offset node for the -/// -static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, SDLoc dl, - SelectionDAG &DAG) { +SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, unsigned Offset, + const SDLoc &DL) { EVT VT = Base.getValueType(); - return DAG.getNode(ISD::ADD, dl, - VT, Base, DAG.getConstant(Offset, dl, VT)); + return getNode(ISD::ADD, DL, VT, Base, getConstant(Offset, DL, VT)); } /// isMemSrcFromString - Returns true if memcpy source is a string constant. /// static bool isMemSrcFromString(SDValue Src, StringRef &Str) { - unsigned SrcDelta = 0; + uint64_t SrcDelta = 0; GlobalAddressSDNode *G = nullptr; if (Src.getOpcode() == ISD::GlobalAddress) G = cast<GlobalAddressSDNode>(Src); @@ -4149,7 +4170,8 @@ static bool isMemSrcFromString(SDValue Src, StringRef &Str) { if (!G) return false; - return getConstantStringInfo(G->getGlobal(), Str, SrcDelta, false); + return getConstantStringInfo(G->getGlobal(), Str, + SrcDelta + G->getOffset(), false); } /// Determines the optimal series of memory ops to replace the memset / memcpy. @@ -4163,6 +4185,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, bool ZeroMemset, bool MemcpyStrSrc, bool AllowOverlap, + unsigned DstAS, unsigned SrcAS, SelectionDAG &DAG, const TargetLowering &TLI) { assert((SrcAlign == 0 || SrcAlign >= DstAlign) && @@ -4179,10 +4202,9 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, DAG.getMachineFunction()); if (VT == MVT::Other) { - unsigned AS = 0; - if (DstAlign >= DAG.getDataLayout().getPointerPrefAlignment(AS) || - TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign)) { - VT = TLI.getPointerTy(DAG.getDataLayout()); + if (DstAlign >= DAG.getDataLayout().getPointerPrefAlignment(DstAS) || + TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) { + VT = TLI.getPointerTy(DAG.getDataLayout(), DstAS); } else { switch (DstAlign & 7) { case 0: VT = MVT::i64; break; @@ -4238,10 +4260,9 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, // FIXME: Only does this for 64-bit or more since we don't have proper // cost model for unaligned load / store. bool Fast; - unsigned AS = 0; if (NumMemOps && AllowOverlap && VTSize >= 8 && NewVTSize < Size && - TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign, &Fast) && Fast) + TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) && Fast) VTSize = Size; else { VT = NewVT; @@ -4267,15 +4288,14 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { return MF.getFunction()->optForSize(); } -static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, - SDValue Chain, SDValue Dst, - SDValue Src, uint64_t Size, - unsigned Align, bool isVol, - bool AlwaysInline, +static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + uint64_t Size, unsigned Align, + bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { // Turn a memcpy of undef to nop. - if (Src.getOpcode() == ISD::UNDEF) + if (Src.isUndef()) return Chain; // Expand memcpy to a series of load and store ops if the size operand falls @@ -4302,7 +4322,10 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), (isZeroStr ? 0 : SrcAlign), - false, false, CopyFromStr, true, DAG, TLI)) + false, false, CopyFromStr, true, + DstPtrInfo.getAddrSpace(), + SrcPtrInfo.getAddrSpace(), + DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -4325,6 +4348,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, } } + MachineMemOperand::Flags MMOFlags = + isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; SmallVector<SDValue, 8> OutChains; unsigned NumMemOps = MemOps.size(); uint64_t SrcOff = 0, DstOff = 0; @@ -4351,9 +4376,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff)); if (Value.getNode()) Store = DAG.getStore(Chain, dl, Value, - getMemBasePlusOffset(Dst, DstOff, dl, DAG), - DstPtrInfo.getWithOffset(DstOff), isVol, - false, Align); + DAG.getMemBasePlusOffset(Dst, DstOff, dl), + DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags); } if (!Store.getNode()) { @@ -4365,13 +4389,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); assert(NVT.bitsGE(VT)); Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, - getMemBasePlusOffset(Src, SrcOff, dl, DAG), - SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false, - false, MinAlign(SrcAlign, SrcOff)); - Store = DAG.getTruncStore(Chain, dl, Value, - getMemBasePlusOffset(Dst, DstOff, dl, DAG), - DstPtrInfo.getWithOffset(DstOff), VT, isVol, - false, Align); + DAG.getMemBasePlusOffset(Src, SrcOff, dl), + SrcPtrInfo.getWithOffset(SrcOff), VT, + MinAlign(SrcAlign, SrcOff), MMOFlags); + OutChains.push_back(Value.getValue(1)); + Store = DAG.getTruncStore( + Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), + DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags); } OutChains.push_back(Store); SrcOff += VTSize; @@ -4382,15 +4406,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } -static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, - SDValue Chain, SDValue Dst, - SDValue Src, uint64_t Size, - unsigned Align, bool isVol, - bool AlwaysInline, +static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + uint64_t Size, unsigned Align, + bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { // Turn a memmove of undef to nop. - if (Src.getOpcode() == ISD::UNDEF) + if (Src.isUndef()) return Chain; // Expand memmove to a series of load and store ops if the size operand falls @@ -4411,7 +4434,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign, - false, false, false, false, DAG, TLI)) + false, false, false, false, + DstPtrInfo.getAddrSpace(), + SrcPtrInfo.getAddrSpace(), + DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -4425,6 +4451,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, } } + MachineMemOperand::Flags MMOFlags = + isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; uint64_t SrcOff = 0, DstOff = 0; SmallVector<SDValue, 8> LoadValues; SmallVector<SDValue, 8> LoadChains; @@ -4435,10 +4463,9 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, unsigned VTSize = VT.getSizeInBits() / 8; SDValue Value; - Value = DAG.getLoad(VT, dl, Chain, - getMemBasePlusOffset(Src, SrcOff, dl, DAG), - SrcPtrInfo.getWithOffset(SrcOff), isVol, - false, false, SrcAlign); + Value = + DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), + SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, MMOFlags); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; @@ -4451,8 +4478,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, SDValue Store; Store = DAG.getStore(Chain, dl, LoadValues[i], - getMemBasePlusOffset(Dst, DstOff, dl, DAG), - DstPtrInfo.getWithOffset(DstOff), isVol, false, Align); + DAG.getMemBasePlusOffset(Dst, DstOff, dl), + DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags); OutChains.push_back(Store); DstOff += VTSize; } @@ -4478,13 +4505,12 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, /// The function tries to replace 'llvm.memset' intrinsic with several store /// operations and value calculation code. This is usually profitable for small /// memory size. -static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, - SDValue Chain, SDValue Dst, - SDValue Src, uint64_t Size, - unsigned Align, bool isVol, +static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + uint64_t Size, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo) { // Turn a memset of undef to nop. - if (Src.getOpcode() == ISD::UNDEF) + if (Src.isUndef()) return Chain; // Expand memset to a series of load/store ops if the size operand @@ -4502,7 +4528,9 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, (DstAlignCanChange ? 0 : Align), 0, - true, IsZeroVal, false, true, DAG, TLI)) + true, IsZeroVal, false, true, + DstPtrInfo.getAddrSpace(), ~0u, + DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -4548,10 +4576,10 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, Value = getMemsetValue(Src, VT, DAG, dl); } assert(Value.getValueType() == VT && "Value with wrong type."); - SDValue Store = DAG.getStore(Chain, dl, Value, - getMemBasePlusOffset(Dst, DstOff, dl, DAG), - DstPtrInfo.getWithOffset(DstOff), - isVol, false, Align); + SDValue Store = DAG.getStore( + Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), + DstPtrInfo.getWithOffset(DstOff), Align, + isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone); OutChains.push_back(Store); DstOff += VT.getSizeInBits() / 8; Size -= VTSize; @@ -4570,10 +4598,10 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI, } } -SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, - SDValue Src, SDValue Size, - unsigned Align, bool isVol, bool AlwaysInline, - bool isTailCall, MachinePointerInfo DstPtrInfo, +SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, + SDValue Src, SDValue Size, unsigned Align, + bool isVol, bool AlwaysInline, bool isTailCall, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -4632,10 +4660,10 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, CLI.setDebugLoc(dl) .setChain(Chain) .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY), - Type::getVoidTy(*getContext()), + Dst.getValueType().getTypeForEVT(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), TLI->getPointerTy(getDataLayout())), - std::move(Args), 0) + std::move(Args)) .setDiscardResult() .setTailCall(isTailCall); @@ -4643,9 +4671,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, return CallResult.second; } -SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, - SDValue Src, SDValue Size, - unsigned Align, bool isVol, bool isTailCall, +SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, + SDValue Src, SDValue Size, unsigned Align, + bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -4693,10 +4721,10 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, CLI.setDebugLoc(dl) .setChain(Chain) .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE), - Type::getVoidTy(*getContext()), + Dst.getValueType().getTypeForEVT(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), TLI->getPointerTy(getDataLayout())), - std::move(Args), 0) + std::move(Args)) .setDiscardResult() .setTailCall(isTailCall); @@ -4704,9 +4732,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, return CallResult.second; } -SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, - SDValue Src, SDValue Size, - unsigned Align, bool isVol, bool isTailCall, +SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, + SDValue Src, SDValue Size, unsigned Align, + bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -4755,10 +4783,10 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, CLI.setDebugLoc(dl) .setChain(Chain) .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), - Type::getVoidTy(*getContext()), + Dst.getValueType().getTypeForEVT(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), TLI->getPointerTy(getDataLayout())), - std::move(Args), 0) + std::move(Args)) .setDiscardResult() .setTailCall(isTailCall); @@ -4766,7 +4794,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, return CallResult.second; } -SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTList, ArrayRef<SDValue> Ops, MachineMemOperand *MMO, AtomicOrdering SuccessOrdering, @@ -4777,41 +4805,31 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, AddNodeIDNode(ID, Opcode, VTList, Ops); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void* IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - // Allocate the operands array for the node out of the BumpPtrAllocator, since - // SDNode doesn't have access to it. This memory will be "leaked" when - // the node is deallocated, but recovered when the allocator is released. - // If the number of operands is less than 5 we use AtomicSDNode's internal - // storage. - unsigned NumOps = Ops.size(); - SDUse *DynOps = NumOps > 4 ? OperandAllocator.Allocate<SDUse>(NumOps) - : nullptr; - - SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), - dl.getDebugLoc(), VTList, MemVT, - Ops.data(), DynOps, NumOps, MMO, - SuccessOrdering, FailureOrdering, - SynchScope); + auto *N = newSDNode<AtomicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), + VTList, MemVT, MMO, SuccessOrdering, + FailureOrdering, SynchScope); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTList, ArrayRef<SDValue> Ops, - MachineMemOperand *MMO, - AtomicOrdering Ordering, + MachineMemOperand *MMO, AtomicOrdering Ordering, SynchronizationScope SynchScope) { return getAtomic(Opcode, dl, MemVT, VTList, Ops, MMO, Ordering, Ordering, SynchScope); } SDValue SelectionDAG::getAtomicCmpSwap( - unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs, SDValue Chain, + unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) { @@ -4826,10 +4844,8 @@ SDValue SelectionDAG::getAtomicCmpSwap( // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. - unsigned Flags = MachineMemOperand::MOVolatile; - Flags |= MachineMemOperand::MOLoad; - Flags |= MachineMemOperand::MOStore; - + auto Flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad | + MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment); @@ -4837,9 +4853,9 @@ SDValue SelectionDAG::getAtomicCmpSwap( SuccessOrdering, FailureOrdering, SynchScope); } -SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, SDLoc dl, EVT MemVT, - SDVTList VTs, SDValue Chain, SDValue Ptr, - SDValue Cmp, SDValue Swp, +SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, + EVT MemVT, SDVTList VTs, SDValue Chain, + SDValue Ptr, SDValue Cmp, SDValue Swp, MachineMemOperand *MMO, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, @@ -4853,11 +4869,9 @@ SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, SDLoc dl, EVT MemVT, SuccessOrdering, FailureOrdering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - SDValue Chain, - SDValue Ptr, SDValue Val, - const Value* PtrVal, - unsigned Alignment, +SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, + SDValue Chain, SDValue Ptr, SDValue Val, + const Value *PtrVal, unsigned Alignment, AtomicOrdering Ordering, SynchronizationScope SynchScope) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 @@ -4870,7 +4884,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, // chained as such. // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. - unsigned Flags = MachineMemOperand::MOVolatile; + auto Flags = MachineMemOperand::MOVolatile; if (Opcode != ISD::ATOMIC_STORE) Flags |= MachineMemOperand::MOLoad; if (Opcode != ISD::ATOMIC_LOAD) @@ -4884,11 +4898,9 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, Ordering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - SDValue Chain, - SDValue Ptr, SDValue Val, - MachineMemOperand *MMO, - AtomicOrdering Ordering, +SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, + SDValue Chain, SDValue Ptr, SDValue Val, + MachineMemOperand *MMO, AtomicOrdering Ordering, SynchronizationScope SynchScope) { assert((Opcode == ISD::ATOMIC_LOAD_ADD || Opcode == ISD::ATOMIC_LOAD_SUB || @@ -4912,11 +4924,9 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - EVT VT, SDValue Chain, - SDValue Ptr, - MachineMemOperand *MMO, - AtomicOrdering Ordering, +SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, + EVT VT, SDValue Chain, SDValue Ptr, + MachineMemOperand *MMO, AtomicOrdering Ordering, SynchronizationScope SynchScope) { assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op"); @@ -4926,7 +4936,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, } /// getMergeValues - Create a MERGE_VALUES node from the given operands. -SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, SDLoc dl) { +SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) { if (Ops.size() == 1) return Ops[0]; @@ -4937,17 +4947,15 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, SDLoc dl) { return getNode(ISD::MERGE_VALUES, dl, getVTList(VTs), Ops); } -SDValue -SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, - ArrayRef<SDValue> Ops, - EVT MemVT, MachinePointerInfo PtrInfo, - unsigned Align, bool Vol, - bool ReadMem, bool WriteMem, unsigned Size) { +SDValue SelectionDAG::getMemIntrinsicNode( + unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, + EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol, + bool ReadMem, bool WriteMem, unsigned Size) { if (Align == 0) // Ensure that codegen never sees alignment 0 Align = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - unsigned Flags = 0; + auto Flags = MachineMemOperand::MONone; if (WriteMem) Flags |= MachineMemOperand::MOStore; if (ReadMem) @@ -4962,10 +4970,10 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO); } -SDValue -SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, - ArrayRef<SDValue> Ops, EVT MemVT, - MachineMemOperand *MMO) { +SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, + SDVTList VTList, + ArrayRef<SDValue> Ops, EVT MemVT, + MachineMemOperand *MMO) { assert((Opcode == ISD::INTRINSIC_VOID || Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::PREFETCH || @@ -4982,19 +4990,20 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, AddNodeIDNode(ID, Opcode, VTList, Ops); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), - dl.getDebugLoc(), VTList, Ops, - MemVT, MMO); - CSEMap.InsertNode(N, IP); + N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), + VTList, MemVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), - dl.getDebugLoc(), VTList, Ops, - MemVT, MMO); + N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), + VTList, MemVT, MMO); + createOperands(N, Ops); } InsertNode(N); return SDValue(N, 0); @@ -5032,50 +5041,40 @@ static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, // If the 'Offset' value isn't a constant, we can't handle this. if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp)) return InferPointerInfo(DAG, Ptr, OffsetNode->getSExtValue()); - if (OffsetOp.getOpcode() == ISD::UNDEF) + if (OffsetOp.isUndef()) return InferPointerInfo(DAG, Ptr); return MachinePointerInfo(); } - -SDValue -SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, - EVT VT, SDLoc dl, SDValue Chain, - SDValue Ptr, SDValue Offset, - MachinePointerInfo PtrInfo, EVT MemVT, - bool isVolatile, bool isNonTemporal, bool isInvariant, - unsigned Alignment, const AAMDNodes &AAInfo, - const MDNode *Ranges) { +SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, SDValue Offset, + MachinePointerInfo PtrInfo, EVT MemVT, + unsigned Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, const MDNode *Ranges) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(VT); - unsigned Flags = MachineMemOperand::MOLoad; - if (isVolatile) - Flags |= MachineMemOperand::MOVolatile; - if (isNonTemporal) - Flags |= MachineMemOperand::MONonTemporal; - if (isInvariant) - Flags |= MachineMemOperand::MOInvariant; - + MMOFlags |= MachineMemOperand::MOLoad; + assert((MMOFlags & MachineMemOperand::MOStore) == 0); // If we don't have a PtrInfo, infer the trivial frame index case to simplify // clients. if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(*this, Ptr, Offset); MachineFunction &MF = getMachineFunction(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, - AAInfo, Ranges); + MachineMemOperand *MMO = MF.getMachineMemOperand( + PtrInfo, MMOFlags, MemVT.getStoreSize(), Alignment, AAInfo, Ranges); return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); } -SDValue -SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, - EVT VT, SDLoc dl, SDValue Chain, - SDValue Ptr, SDValue Offset, EVT MemVT, - MachineMemOperand *MMO) { +SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, SDValue Offset, EVT MemVT, + MachineMemOperand *MMO) { if (VT == MemVT) { ExtType = ISD::NON_EXTLOAD; } else if (ExtType == ISD::NON_EXTLOAD) { @@ -5094,8 +5093,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, } bool Indexed = AM != ISD::UNINDEXED; - assert((Indexed || Offset.getOpcode() == ISD::UNDEF) && - "Unindexed load with an offset!"); + assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other); @@ -5108,100 +5106,90 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<LoadSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl.getIROrder(), - dl.getDebugLoc(), VTs, AM, ExtType, - MemVT, MMO); + auto *N = newSDNode<LoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, + ExtType, MemVT, MMO); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, - SDValue Chain, SDValue Ptr, - MachinePointerInfo PtrInfo, - bool isVolatile, bool isNonTemporal, - bool isInvariant, unsigned Alignment, - const AAMDNodes &AAInfo, - const MDNode *Ranges) { +SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, MachinePointerInfo PtrInfo, + unsigned Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, const MDNode *Ranges) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, - PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment, - AAInfo, Ranges); + PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges); } -SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, - SDValue Chain, SDValue Ptr, - MachineMemOperand *MMO) { +SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, MachineMemOperand *MMO) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, VT, MMO); } -SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, - SDValue Chain, SDValue Ptr, +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, + EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, - bool isVolatile, bool isNonTemporal, - bool isInvariant, unsigned Alignment, + unsigned Alignment, + MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo) { SDValue Undef = getUNDEF(Ptr.getValueType()); - return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, - PtrInfo, MemVT, isVolatile, isNonTemporal, isInvariant, - Alignment, AAInfo); + return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, PtrInfo, + MemVT, Alignment, MMOFlags, AAInfo); } - -SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, - SDValue Chain, SDValue Ptr, EVT MemVT, +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, + EVT VT, SDValue Chain, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, MemVT, MMO); } -SDValue -SelectionDAG::getIndexedLoad(SDValue OrigLoad, SDLoc dl, SDValue Base, - SDValue Offset, ISD::MemIndexedMode AM) { +SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { LoadSDNode *LD = cast<LoadSDNode>(OrigLoad); - assert(LD->getOffset().getOpcode() == ISD::UNDEF && - "Load is already a indexed load!"); + assert(LD->getOffset().isUndef() && "Load is already a indexed load!"); + // Don't propagate the invariant flag. + auto MMOFlags = + LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant; return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, LD->getChain(), Base, Offset, LD->getPointerInfo(), - LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), - false, LD->getAlignment()); + LD->getMemoryVT(), LD->getAlignment(), MMOFlags); } -SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, +SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, - bool isVolatile, bool isNonTemporal, - unsigned Alignment, const AAMDNodes &AAInfo) { - assert(Chain.getValueType() == MVT::Other && - "Invalid chain type"); + unsigned Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(Val.getValueType()); - unsigned Flags = MachineMemOperand::MOStore; - if (isVolatile) - Flags |= MachineMemOperand::MOVolatile; - if (isNonTemporal) - Flags |= MachineMemOperand::MONonTemporal; + MMOFlags |= MachineMemOperand::MOStore; + assert((MMOFlags & MachineMemOperand::MOLoad) == 0); if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(*this, Ptr); MachineFunction &MF = getMachineFunction(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrInfo, Flags, - Val.getValueType().getStoreSize(), Alignment, - AAInfo); - + MachineMemOperand *MMO = MF.getMachineMemOperand( + PtrInfo, MMOFlags, Val.getValueType().getStoreSize(), Alignment, AAInfo); return getStore(Chain, dl, Val, Ptr, MMO); } -SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, +SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachineMemOperand *MMO) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); @@ -5216,46 +5204,42 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), - dl.getDebugLoc(), VTs, - ISD::UNINDEXED, false, VT, MMO); + auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, + ISD::UNINDEXED, false, VT, MMO); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, +SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, - EVT SVT,bool isVolatile, bool isNonTemporal, - unsigned Alignment, + EVT SVT, unsigned Alignment, + MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(SVT); - unsigned Flags = MachineMemOperand::MOStore; - if (isVolatile) - Flags |= MachineMemOperand::MOVolatile; - if (isNonTemporal) - Flags |= MachineMemOperand::MONonTemporal; + MMOFlags |= MachineMemOperand::MOStore; + assert((MMOFlags & MachineMemOperand::MOLoad) == 0); if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(*this, Ptr); MachineFunction &MF = getMachineFunction(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment, - AAInfo); - + MachineMemOperand *MMO = MF.getMachineMemOperand( + PtrInfo, MMOFlags, SVT.getStoreSize(), Alignment, AAInfo); return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); } -SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, +SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, EVT SVT, MachineMemOperand *MMO) { EVT VT = Val.getValueType(); @@ -5285,24 +5269,24 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), - dl.getDebugLoc(), VTs, - ISD::UNINDEXED, true, SVT, MMO); + auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, + ISD::UNINDEXED, true, SVT, MMO); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue -SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base, - SDValue Offset, ISD::MemIndexedMode AM) { +SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { StoreSDNode *ST = cast<StoreSDNode>(OrigStore); - assert(ST->getOffset().getOpcode() == ISD::UNDEF && - "Store is already a indexed store!"); + assert(ST->getOffset().isUndef() && "Store is already a indexed store!"); SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset }; FoldingSetNodeID ID; @@ -5311,23 +5295,23 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base, ID.AddInteger(ST->getRawSubclassData()); ID.AddInteger(ST->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), - dl.getDebugLoc(), VTs, AM, - ST->isTruncatingStore(), - ST->getMemoryVT(), - ST->getMemOperand()); + auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, + ST->isTruncatingStore(), ST->getMemoryVT(), + ST->getMemOperand()); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue -SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, - SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT, - MachineMemOperand *MMO, ISD::LoadExtType ExtTy) { +SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, SDValue Mask, SDValue Src0, + EVT MemVT, MachineMemOperand *MMO, + ISD::LoadExtType ExtTy) { SDVTList VTs = getVTList(VT, MVT::Other); SDValue Ops[] = { Chain, Ptr, Mask, Src0 }; @@ -5340,21 +5324,23 @@ SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<MaskedLoadSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(), - dl.getDebugLoc(), Ops, 4, VTs, - ExtTy, MemVT, MMO); + auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, + ExtTy, MemVT, MMO); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val, - SDValue Ptr, SDValue Mask, EVT MemVT, - MachineMemOperand *MMO, bool isTrunc) { +SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, + SDValue Val, SDValue Ptr, SDValue Mask, + EVT MemVT, MachineMemOperand *MMO, + bool isTrunc) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); EVT VT = Val.getValueType(); @@ -5367,22 +5353,23 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val, MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<MaskedStoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(), - dl.getDebugLoc(), Ops, 4, - VTs, isTrunc, MemVT, MMO); + auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, + isTrunc, MemVT, MMO); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue -SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl, - ArrayRef<SDValue> Ops, - MachineMemOperand *MMO) { +SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, + ArrayRef<SDValue> Ops, + MachineMemOperand *MMO) { + assert(Ops.size() == 5 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); @@ -5393,21 +5380,34 @@ SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl, MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<MaskedGatherSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - MaskedGatherSDNode *N = - new (NodeAllocator) MaskedGatherSDNode(dl.getIROrder(), dl.getDebugLoc(), - Ops, VTs, VT, MMO); + + auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), + VTs, VT, MMO); + createOperands(N, Ops); + + assert(N->getValue().getValueType() == N->getValueType(0) && + "Incompatible type of the PassThru value in MaskedGatherSDNode"); + assert(N->getMask().getValueType().getVectorNumElements() == + N->getValueType(0).getVectorNumElements() && + "Vector width mismatch between mask and data"); + assert(N->getIndex().getValueType().getVectorNumElements() == + N->getValueType(0).getVectorNumElements() && + "Vector width mismatch between index and data"); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, SDLoc dl, +SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, MachineMemOperand *MMO) { + assert(Ops.size() == 5 && "Incompatible number of operands"); + FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); ID.AddInteger(VT.getRawBits()); @@ -5416,27 +5416,33 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, SDLoc dl, MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast<MaskedScatterSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = - new (NodeAllocator) MaskedScatterSDNode(dl.getIROrder(), dl.getDebugLoc(), - Ops, VTs, VT, MMO); + auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), + VTs, VT, MMO); + createOperands(N, Ops); + + assert(N->getMask().getValueType().getVectorNumElements() == + N->getValue().getValueType().getVectorNumElements() && + "Vector width mismatch between mask and data"); + assert(N->getIndex().getValueType().getVectorNumElements() == + N->getValue().getValueType().getVectorNumElements() && + "Vector width mismatch between index and data"); + CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl, - SDValue Chain, SDValue Ptr, - SDValue SV, - unsigned Align) { +SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, SDValue SV, unsigned Align) { SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) }; return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef<SDUse> Ops) { switch (Ops.size()) { case 0: return getNode(Opcode, DL, VT); @@ -5452,7 +5458,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, return getNode(Opcode, DL, VT, NewOps); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops, const SDNodeFlags *Flags) { unsigned NumOps = Ops.size(); switch (NumOps) { @@ -5498,27 +5504,28 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) return SDValue(E, 0); - N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), - VTs, Ops); + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), - VTs, Ops); + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); } InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) { return getNode(Opcode, DL, getVTList(ResultTys), Ops); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, ArrayRef<SDValue> Ops) { if (VTList.NumVTs == 1) return getNode(Opcode, DL, VTList.VTs[0], Ops); @@ -5548,83 +5555,56 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, // Memoize the node unless it returns a flag. SDNode *N; - unsigned NumOps = Ops.size(); if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) return SDValue(E, 0); - if (NumOps == 1) { - N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTList, Ops[0]); - } else if (NumOps == 2) { - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTList, Ops[0], - Ops[1]); - } else if (NumOps == 3) { - N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTList, Ops[0], - Ops[1], Ops[2]); - } else { - N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), - VTList, Ops); - } + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList); + createOperands(N, Ops); CSEMap.InsertNode(N, IP); } else { - if (NumOps == 1) { - N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTList, Ops[0]); - } else if (NumOps == 2) { - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTList, Ops[0], - Ops[1]); - } else if (NumOps == 3) { - N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTList, Ops[0], - Ops[1], Ops[2]); - } else { - N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), - VTList, Ops); - } + N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList); + createOperands(N, Ops); } InsertNode(N); return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList) { +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, + SDVTList VTList) { return getNode(Opcode, DL, VTList, None); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue N1) { SDValue Ops[] = { N1 }; return getNode(Opcode, DL, VTList, Ops); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue N1, SDValue N2) { SDValue Ops[] = { N1, N2 }; return getNode(Opcode, DL, VTList, Ops); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3) { SDValue Ops[] = { N1, N2, N3 }; return getNode(Opcode, DL, VTList, Ops); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, - SDValue N1, SDValue N2, SDValue N3, - SDValue N4) { +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3, SDValue N4) { SDValue Ops[] = { N1, N2, N3, N4 }; return getNode(Opcode, DL, VTList, Ops); } -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, - SDValue N1, SDValue N2, SDValue N3, - SDValue N4, SDValue N5) { +SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3, SDValue N4, + SDValue N5) { SDValue Ops[] = { N1, N2, N3, N4, N5 }; return getNode(Opcode, DL, VTList, Ops); } @@ -5932,10 +5912,14 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDVTList VTs,ArrayRef<SDValue> Ops) { - N = MorphNodeTo(N, ~MachineOpc, VTs, Ops); + SDNode *New = MorphNodeTo(N, ~MachineOpc, VTs, Ops); // Reset the NodeID to -1. - N->setNodeId(-1); - return N; + New->setNodeId(-1); + if (New != N) { + ReplaceAllUsesWith(N, New); + RemoveDeadNode(N); + } + return New; } /// UpdadeSDLocOnMergedSDNode - If the opt level is -O0 then it throws away @@ -5945,7 +5929,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, /// probability having other instructions associated with that line. /// /// For IROrder, we keep the smaller of the two -SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) { +SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, const SDLoc &OLoc) { DebugLoc NLoc = N->getDebugLoc(); if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) { N->setDebugLoc(DebugLoc()); @@ -5973,13 +5957,12 @@ SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) { /// deleting things. SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef<SDValue> Ops) { - unsigned NumOps = Ops.size(); // If an identical node already exists, use it. void *IP = nullptr; if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, VTs, Ops); - if (SDNode *ON = FindNodeOrInsertPos(ID, N->getDebugLoc(), IP)) + if (SDNode *ON = FindNodeOrInsertPos(ID, SDLoc(N), IP)) return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N)); } @@ -6002,36 +5985,13 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, DeadNodeSet.insert(Used); } - if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) { - // Initialize the memory references information. + // For MachineNode, initialize the memory references information. + if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) MN->setMemRefs(nullptr, nullptr); - // If NumOps is larger than the # of operands we can have in a - // MachineSDNode, reallocate the operand list. - if (NumOps > MN->NumOperands || !MN->OperandsNeedDelete) { - if (MN->OperandsNeedDelete) - delete[] MN->OperandList; - if (NumOps > array_lengthof(MN->LocalOperands)) - // We're creating a final node that will live unmorphed for the - // remainder of the current SelectionDAG iteration, so we can allocate - // the operands directly out of a pool with no recycling metadata. - MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps), - Ops.data(), NumOps); - else - MN->InitOperands(MN->LocalOperands, Ops.data(), NumOps); - MN->OperandsNeedDelete = false; - } else - MN->InitOperands(MN->OperandList, Ops.data(), NumOps); - } else { - // If NumOps is larger than the # of operands we currently have, reallocate - // the operand list. - if (NumOps > N->NumOperands) { - if (N->OperandsNeedDelete) - delete[] N->OperandList; - N->InitOperands(new SDUse[NumOps], Ops.data(), NumOps); - N->OperandsNeedDelete = true; - } else - N->InitOperands(N->OperandList, Ops.data(), NumOps); - } + + // Swap for an appropriately sized array from the recycler. + removeOperands(N); + createOperands(N, Ops); // Delete any nodes that are still dead after adding the uses for the // new operands. @@ -6055,155 +6015,133 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, /// Note that getMachineNode returns the resultant node. If there is already a /// node of the specified opcode and operands, it returns that node instead of /// the current one. -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT) { SDVTList VTs = getVTList(VT); return getMachineNode(Opcode, dl, VTs, None); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, SDValue Op1) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT, SDValue Op1) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1 }; return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, - SDValue Op1, SDValue Op2) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2 }; return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, - SDValue Op1, SDValue Op2, SDValue Op3) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT, SDValue Op1, SDValue Op2, + SDValue Op3) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2, Op3 }; return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, - ArrayRef<SDValue> Ops) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT, ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT); return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2) { SDVTList VTs = getVTList(VT1, VT2); return getMachineNode(Opcode, dl, VTs, None); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, - EVT VT1, EVT VT2, SDValue Op1) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, SDValue Op1) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1 }; return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, - EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, SDValue Op1, + SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2 }; return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, - EVT VT1, EVT VT2, SDValue Op1, - SDValue Op2, SDValue Op3) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, SDValue Op1, + SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2, Op3 }; return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, - EVT VT1, EVT VT2, - ArrayRef<SDValue> Ops) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2); return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, - EVT VT1, EVT VT2, EVT VT3, - SDValue Op1, SDValue Op2) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, EVT VT3, + SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2, VT3); SDValue Ops[] = { Op1, Op2 }; return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, - EVT VT1, EVT VT2, EVT VT3, - SDValue Op1, SDValue Op2, SDValue Op3) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, EVT VT3, + SDValue Op1, SDValue Op2, + SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2, VT3); SDValue Ops[] = { Op1, Op2, Op3 }; return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, - EVT VT1, EVT VT2, EVT VT3, - ArrayRef<SDValue> Ops) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, EVT VT3, + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3); return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, - EVT VT2, EVT VT3, EVT VT4, - ArrayRef<SDValue> Ops) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, EVT VT3, EVT VT4, + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, - ArrayRef<EVT> ResultTys, - ArrayRef<SDValue> Ops) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + ArrayRef<EVT> ResultTys, + ArrayRef<SDValue> Ops) { SDVTList VTs = getVTList(ResultTys); return getMachineNode(Opcode, dl, VTs, Ops); } -MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs, - ArrayRef<SDValue> OpsArray) { +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL, + SDVTList VTs, + ArrayRef<SDValue> Ops) { bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue; MachineSDNode *N; void *IP = nullptr; - const SDValue *Ops = OpsArray.data(); - unsigned NumOps = OpsArray.size(); if (DoCSE) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ~Opcode, VTs, OpsArray); + AddNodeIDNode(ID, ~Opcode, VTs, Ops); IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { return cast<MachineSDNode>(UpdadeSDLocOnMergedSDNode(E, DL)); } } // Allocate a new MachineSDNode. - N = new (NodeAllocator) MachineSDNode(~Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTs); - - // Initialize the operands list. - if (NumOps > array_lengthof(N->LocalOperands)) - // We're creating a final node that will live unmorphed for the - // remainder of the current SelectionDAG iteration, so we can allocate - // the operands directly out of a pool with no recycling metadata. - N->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps), - Ops, NumOps); - else - N->InitOperands(N->LocalOperands, Ops, NumOps); - N->OperandsNeedDelete = false; + N = newSDNode<MachineSDNode>(~Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); if (DoCSE) CSEMap.InsertNode(N, IP); @@ -6214,9 +6152,8 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs, /// getTargetExtractSubreg - A convenience function for creating /// TargetOpcode::EXTRACT_SUBREG nodes. -SDValue -SelectionDAG::getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT, - SDValue Operand) { +SDValue SelectionDAG::getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, + SDValue Operand) { SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32); SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, Operand, SRIdxVal); @@ -6225,9 +6162,8 @@ SelectionDAG::getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT, /// getTargetInsertSubreg - A convenience function for creating /// TargetOpcode::INSERT_SUBREG nodes. -SDValue -SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT, - SDValue Operand, SDValue Subreg) { +SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, + SDValue Operand, SDValue Subreg) { SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32); SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, Operand, Subreg, SRIdxVal); @@ -6243,7 +6179,7 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP)) { + if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) { if (Flags) E->intersectFlagsWith(Flags); return E; @@ -6257,7 +6193,7 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, /// SDNode SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool IsIndirect, uint64_t Off, - DebugLoc DL, unsigned O) { + const DebugLoc &DL, unsigned O) { assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); return new (DbgInfo->getAlloc()) @@ -6267,7 +6203,7 @@ SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, /// Constant SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t Off, - DebugLoc DL, unsigned O) { + const DebugLoc &DL, unsigned O) { assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, Off, DL, O); @@ -6276,7 +6212,8 @@ SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr, /// FrameIndex SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t Off, - DebugLoc DL, unsigned O) { + const DebugLoc &DL, + unsigned O) { assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, Off, DL, O); @@ -6348,6 +6285,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { AddModifiedNodeToCSEMaps(User); } + // Preserve Debug Values + TransferDbgValues(FromN, To); + // If we just RAUW'd the root, take note. if (FromN == getRoot()) setRoot(To); @@ -6371,6 +6311,13 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) { if (From == To) return; + // Preserve Debug Info. Only do this if there's a use. + for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) + if (From->hasAnyUseOfValue(i)) { + assert((i < To->getNumValues()) && "Invalid To location"); + TransferDbgValues(SDValue(From, i), SDValue(To, i)); + } + // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); @@ -6410,6 +6357,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { if (From->getNumValues() == 1) // Handle the simple case efficiently. return ReplaceAllUsesWith(SDValue(From, 0), To[0]); + // Preserve Debug Info. + for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) + TransferDbgValues(SDValue(From, i), *To); + // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); @@ -6454,6 +6405,9 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ return; } + // Preserve Debug Info. + TransferDbgValues(From, To); + // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. SDNode::use_iterator UI = From.getNode()->use_begin(), @@ -6528,6 +6482,8 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, if (Num == 1) return ReplaceAllUsesOfValueWith(*From, *To); + TransferDbgValues(*From, *To); + // Read up all the uses and make records of them. This helps // processing new uses that are introduced during the // replacement process. @@ -6628,7 +6584,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() { if (Degree == 0) { // All of P's operands are sorted, so P may sorted now. P->setNodeId(DAGSize++); - if (P != SortedPos) + if (P->getIterator() != SortedPos) SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P)); assert(SortedPos != AllNodes.end() && "Overran node list"); ++SortedPos; @@ -6637,7 +6593,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() { P->setNodeId(Degree); } } - if (&Node == SortedPos) { + if (Node.getIterator() == SortedPos) { #ifndef NDEBUG allnodes_iterator I(N); SDNode *S = &*++I; @@ -6676,7 +6632,7 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { DbgInfo->add(DB, SD, isParameter); } -/// TransferDbgValues - Transfer SDDbgValues. +/// TransferDbgValues - Transfer SDDbgValues. Called in replace nodes. void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { if (From == To || !From.getNode()->getHasDebugValue()) return; @@ -6687,17 +6643,22 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end(); I != E; ++I) { SDDbgValue *Dbg = *I; - if (Dbg->getKind() == SDDbgValue::SDNODE) { + // Only add Dbgvalues attached to same ResNo. + if (Dbg->getKind() == SDDbgValue::SDNODE && + Dbg->getSDNode() == From.getNode() && + Dbg->getResNo() == From.getResNo() && !Dbg->isInvalidated()) { + assert(FromNode != ToNode && + "Should not transfer Debug Values intranode"); SDDbgValue *Clone = getDbgValue(Dbg->getVariable(), Dbg->getExpression(), ToNode, To.getResNo(), Dbg->isIndirect(), Dbg->getOffset(), Dbg->getDebugLoc(), Dbg->getOrder()); ClonedDVs.push_back(Clone); + Dbg->setIsInvalidated(); } } - for (SmallVectorImpl<SDDbgValue *>::iterator I = ClonedDVs.begin(), - E = ClonedDVs.end(); I != E; ++I) - AddDbgValue(*I, ToNode, false); + for (SDDbgValue *I : ClonedDVs) + AddDbgValue(I, ToNode, false); } //===----------------------------------------------------------------------===// @@ -6724,26 +6685,31 @@ bool llvm::isOneConstant(SDValue V) { return Const != nullptr && Const->isOne(); } +bool llvm::isBitwiseNot(SDValue V) { + return V.getOpcode() == ISD::XOR && isAllOnesConstant(V.getOperand(1)); +} + HandleSDNode::~HandleSDNode() { DropOperands(); } GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order, - DebugLoc DL, const GlobalValue *GA, - EVT VT, int64_t o, unsigned char TF) - : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { + const DebugLoc &DL, + const GlobalValue *GA, EVT VT, + int64_t o, unsigned char TF) + : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { TheGlobal = GA; } -AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, DebugLoc dl, EVT VT, - SDValue X, unsigned SrcAS, +AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, + EVT VT, unsigned SrcAS, unsigned DestAS) - : UnarySDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT), X), - SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {} + : SDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT)), + SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {} -MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, - EVT memvt, MachineMemOperand *mmo) - : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) { +MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, + SDVTList VTs, EVT memvt, MachineMemOperand *mmo) + : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) { SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant()); assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); @@ -6755,16 +6721,6 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!"); } -MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, - ArrayRef<SDValue> Ops, EVT memvt, MachineMemOperand *mmo) - : SDNode(Opc, Order, dl, VTs, Ops), - MemoryVT(memvt), MMO(mmo) { - SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal(), MMO->isInvariant()); - assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); - assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!"); -} - /// Profile - Gather unique data for the node. /// void SDNode::Profile(FoldingSetNodeID &ID) const { @@ -6894,44 +6850,13 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, return false; } -/// hasPredecessor - Return true if N is a predecessor of this node. -/// N is either an operand of this node, or can be reached by recursively -/// traversing up the operands. -/// NOTE: This is an expensive method. Use it carefully. bool SDNode::hasPredecessor(const SDNode *N) const { SmallPtrSet<const SDNode *, 32> Visited; SmallVector<const SDNode *, 16> Worklist; + Worklist.push_back(this); return hasPredecessorHelper(N, Visited, Worklist); } -bool -SDNode::hasPredecessorHelper(const SDNode *N, - SmallPtrSetImpl<const SDNode *> &Visited, - SmallVectorImpl<const SDNode *> &Worklist) const { - if (Visited.empty()) { - Worklist.push_back(this); - } else { - // Take a look in the visited set. If we've already encountered this node - // we needn't search further. - if (Visited.count(N)) - return true; - } - - // Haven't visited N yet. Continue the search. - while (!Worklist.empty()) { - const SDNode *M = Worklist.pop_back_val(); - for (const SDValue &OpV : M->op_values()) { - SDNode *Op = OpV.getNode(); - if (Visited.insert(Op).second) - Worklist.push_back(Op); - if (Op == N) - return true; - } - } - - return false; -} - uint64_t SDNode::getConstantOperandVal(unsigned Num) const { assert(Num < NumOperands && "Invalid child # of SDNode!"); return cast<ConstantSDNode>(OperandList[Num])->getZExtValue(); @@ -7018,12 +6943,14 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { EVT::getVectorVT(*getContext(), EltVT, ResNE), Scalars); } - -/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a -/// location that is 'Dist' units away from the location that the 'Base' load -/// is loading from. -bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, - unsigned Bytes, int Dist) const { +bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, + LoadSDNode *Base, + unsigned Bytes, + int Dist) const { + if (LD->isVolatile() || Base->isVolatile()) + return false; + if (LD->isIndexed() || Base->isIndexed()) + return false; if (LD->getChain() != Base->getChain()) return false; EVT VT = LD->getValueType(0); @@ -7204,7 +7131,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, SDValue OpVal = getOperand(i); unsigned BitPos = j * EltBitSize; - if (OpVal.getOpcode() == ISD::UNDEF) + if (OpVal.isUndef()) SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize); else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize). @@ -7250,7 +7177,7 @@ SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { SDValue Splatted; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { SDValue Op = getOperand(i); - if (Op.getOpcode() == ISD::UNDEF) { + if (Op.isUndef()) { if (UndefElements) (*UndefElements)[i] = true; } else if (!Splatted) { @@ -7261,7 +7188,7 @@ SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { } if (!Splatted) { - assert(getOperand(0).getOpcode() == ISD::UNDEF && + assert(getOperand(0).isUndef() && "Can only have a splat without a constant for all undefs."); return getOperand(0); } @@ -7286,7 +7213,7 @@ BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements))) { bool IsExact; APSInt IntVal(BitWidth); - APFloat APF = CN->getValueAPF(); + const APFloat &APF = CN->getValueAPF(); if (APF.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) != APFloat::opOK || !IsExact) @@ -7322,6 +7249,22 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { return true; } +// \brief Returns the SDNode if it is a constant integer BuildVector +// or constant integer. +SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) { + if (isa<ConstantSDNode>(N)) + return N.getNode(); + if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) + return N.getNode(); + // Treat a GlobalAddress supporting constant offset folding as a + // constant integer. + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N)) + if (GA->getOpcode() == ISD::GlobalAddress && + TLI->isOffsetFoldingLegal(GA)) + return GA; + return nullptr; +} + #ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, SmallPtrSetImpl<const SDNode*> &Visited, @@ -7353,9 +7296,9 @@ void llvm::checkForCycles(const llvm::SDNode *N, bool force) { #ifndef NDEBUG bool check = force; -#ifdef XDEBUG +#ifdef EXPENSIVE_CHECKS check = true; -#endif // XDEBUG +#endif // EXPENSIVE_CHECKS if (check) { assert(N && "Checking nonexistent SDNode"); SmallPtrSet<const SDNode*, 32> visited; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 45ae39a..e1fc37d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -20,9 +20,11 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -34,6 +36,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/CallingConv.h" @@ -42,6 +45,7 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" @@ -61,7 +65,6 @@ #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetSelectionDAGInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <utility> @@ -84,6 +87,19 @@ static cl::opt<bool> EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden, cl::desc("Enable fast-math-flags for DAG nodes")); +/// Minimum jump table density for normal functions. +static cl::opt<unsigned> +JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden, + cl::desc("Minimum density for building a jump table in " + "a normal function")); + +/// Minimum jump table density for -Os or -Oz functions. +static cl::opt<unsigned> +OptsizeJumpTableDensity("optsize-jump-table-density", cl::init(40), cl::Hidden, + cl::desc("Minimum density for building a jump table in " + "an optsize function")); + + // Limit the width of DAG chains. This is important in general to prevent // DAG-based analysis from blowing up. For example, alias analysis and // load clustering may not complete in reasonable time. It is difficult to @@ -94,26 +110,25 @@ EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden, // MaxParallelChains default is arbitrarily high to avoid affecting // optimization, but could be lowered to improve compile time. Any ld-ld-st-st // sequence over this should have been converted to llvm.memcpy by the -// frontend. It easy to induce this behavior with .ll code such as: +// frontend. It is easy to induce this behavior with .ll code such as: // %buffer = alloca [4096 x i8] // %data = load [4096 x i8]* %argPtr // store [4096 x i8] %data, [4096 x i8]* %buffer static const unsigned MaxParallelChains = 64; -static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V); /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type -/// larger then ValueVT then AssertOp can be used to specify whether the extra +/// larger than ValueVT then AssertOp can be used to specify whether the extra /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT /// (ISD::AssertSext). -static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, - const SDValue *Parts, - unsigned NumParts, MVT PartVT, EVT ValueVT, - const Value *V, - ISD::NodeType AssertOp = ISD::DELETED_NODE) { +static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, + const SDValue *Parts, unsigned NumParts, + MVT PartVT, EVT ValueVT, const Value *V, + Optional<ISD::NodeType> AssertOp = None) { if (ValueVT.isVector()) return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V); @@ -193,6 +208,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, } // There is now one part, held in Val. Correct it to match ValueVT. + // PartEVT is the type of the register class that holds the value. + // ValueVT is the type of the inline asm operation. EVT PartEVT = Val.getValueType(); if (PartEVT == ValueVT) @@ -206,13 +223,18 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val); } + // Handle types that have the same size. + if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + + // Handle types with different sizes. if (PartEVT.isInteger() && ValueVT.isInteger()) { if (ValueVT.bitsLT(PartEVT)) { // For a truncate, see if we have any information to // indicate whether the truncated bits will always be // zero or sign-extension. - if (AssertOp != ISD::DELETED_NODE) - Val = DAG.getNode(AssertOp, DL, PartEVT, Val, + if (AssertOp.hasValue()) + Val = DAG.getNode(*AssertOp, DL, PartEVT, Val, DAG.getValueType(ValueVT)); return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); } @@ -229,9 +251,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } - if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) - return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - llvm_unreachable("Unknown mismatch!"); } @@ -251,10 +270,10 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, /// getCopyFromPartsVector - Create a value that contains the specified legal /// parts combined into the value they represent. If the parts combine to a -/// type larger then ValueVT then AssertOp can be used to specify whether the +/// type larger than ValueVT then AssertOp can be used to specify whether the /// extra bits are known to be zero (ISD::AssertZext) or sign extended from /// ValueVT (ISD::AssertSext). -static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V) { assert(ValueVT.isVector() && "Not a vector value"); @@ -353,16 +372,16 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } -static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc dl, +static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. -static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, - SDValue Val, SDValue *Parts, unsigned NumParts, - MVT PartVT, const Value *V, +static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, + SDValue *Parts, unsigned NumParts, MVT PartVT, + const Value *V, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { EVT ValueVT = Val.getValueType(); @@ -427,9 +446,11 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, "Failed to tile the value with PartVT!"); if (NumParts == 1) { - if (PartEVT != ValueVT) + if (PartEVT != ValueVT) { diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, "scalar-to-vector conversion failed"); + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); + } Parts[0] = Val; return; @@ -489,7 +510,7 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, /// getCopyToPartsVector - Create a series of nodes that contain the specified /// value split into legal parts. -static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, +static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V) { EVT ValueVT = Val.getValueType(); @@ -618,9 +639,8 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, /// If the Flag pointer is NULL, no flag is used. SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, - SDLoc dl, - SDValue &Chain, SDValue *Flag, - const Value *V) const { + const SDLoc &dl, SDValue &Chain, + SDValue *Flag, const Value *V) const { // A Value with type {} or [0 x %t] needs no registers. if (ValueVTs.empty()) return SDValue(); @@ -676,25 +696,33 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // now, just use the tightest assertzext/assertsext possible. bool isSExt = true; EVT FromVT(MVT::Other); - if (NumSignBits == RegSize) - isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 - else if (NumZeroBits >= RegSize-1) - isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 - else if (NumSignBits > RegSize-8) - isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 - else if (NumZeroBits >= RegSize-8) - isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 - else if (NumSignBits > RegSize-16) - isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 - else if (NumZeroBits >= RegSize-16) - isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 - else if (NumSignBits > RegSize-32) - isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 - else if (NumZeroBits >= RegSize-32) - isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 - else + if (NumSignBits == RegSize) { + isSExt = true; // ASSERT SEXT 1 + FromVT = MVT::i1; + } else if (NumZeroBits >= RegSize - 1) { + isSExt = false; // ASSERT ZEXT 1 + FromVT = MVT::i1; + } else if (NumSignBits > RegSize - 8) { + isSExt = true; // ASSERT SEXT 8 + FromVT = MVT::i8; + } else if (NumZeroBits >= RegSize - 8) { + isSExt = false; // ASSERT ZEXT 8 + FromVT = MVT::i8; + } else if (NumSignBits > RegSize - 16) { + isSExt = true; // ASSERT SEXT 16 + FromVT = MVT::i16; + } else if (NumZeroBits >= RegSize - 16) { + isSExt = false; // ASSERT ZEXT 16 + FromVT = MVT::i16; + } else if (NumSignBits > RegSize - 32) { + isSExt = true; // ASSERT SEXT 32 + FromVT = MVT::i32; + } else if (NumZeroBits >= RegSize - 32) { + isSExt = false; // ASSERT ZEXT 32 + FromVT = MVT::i32; + } else { continue; - + } // Add an assertion node. assert(FromVT != MVT::Other); Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, @@ -714,8 +742,9 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. -void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, - SDValue &Chain, SDValue *Flag, const Value *V, +void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, + const SDLoc &dl, SDValue &Chain, SDValue *Flag, + const Value *V, ISD::NodeType PreferredExtendType) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); ISD::NodeType ExtendKind = PreferredExtendType; @@ -770,7 +799,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, /// operand list. This adds the code marker and includes the number of /// values added into it. void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, - unsigned MatchingIdx, SDLoc dl, + unsigned MatchingIdx, const SDLoc &dl, SelectionDAG &DAG, std::vector<SDValue> &Ops) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -902,10 +931,48 @@ SDValue SelectionDAGBuilder::getControlRoot() { return Root; } +/// Copy swift error to the final virtual register at end of a basic block, as +/// specified by SwiftErrorWorklist, if necessary. +static void copySwiftErrorsToFinalVRegs(SelectionDAGBuilder &SDB) { + const TargetLowering &TLI = SDB.DAG.getTargetLoweringInfo(); + if (!TLI.supportSwiftError()) + return; + + if (!SDB.FuncInfo.SwiftErrorWorklist.count(SDB.FuncInfo.MBB)) + return; + + // Go through entries in SwiftErrorWorklist, and create copy as necessary. + FunctionLoweringInfo::SwiftErrorVRegs &WorklistEntry = + SDB.FuncInfo.SwiftErrorWorklist[SDB.FuncInfo.MBB]; + FunctionLoweringInfo::SwiftErrorVRegs &MapEntry = + SDB.FuncInfo.SwiftErrorMap[SDB.FuncInfo.MBB]; + for (unsigned I = 0, E = WorklistEntry.size(); I < E; I++) { + unsigned WorkReg = WorklistEntry[I]; + + // Find the swifterror virtual register for the value in SwiftErrorMap. + unsigned MapReg = MapEntry[I]; + assert(TargetRegisterInfo::isVirtualRegister(MapReg) && + "Entries in SwiftErrorMap should be virtual registers"); + + if (WorkReg == MapReg) + continue; + + // Create copy from SwiftErrorMap to SwiftWorklist. + auto &DL = SDB.DAG.getDataLayout(); + SDValue CopyNode = SDB.DAG.getCopyToReg( + SDB.getRoot(), SDB.getCurSDLoc(), WorkReg, + SDB.DAG.getRegister(MapReg, EVT(TLI.getPointerTy(DL)))); + MapEntry[I] = WorkReg; + SDB.DAG.setRoot(CopyNode); + } +} + void SelectionDAGBuilder::visit(const Instruction &I) { // Set up outgoing PHI node register values before emitting the terminator. - if (isa<TerminatorInst>(&I)) + if (isa<TerminatorInst>(&I)) { + copySwiftErrorsToFinalVRegs(*this); HandlePHINodesInSuccessorBlocks(I.getParent()); + } ++SDNodeOrder; @@ -992,10 +1059,8 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { // If there's a virtual register allocated and initialized for this // value, use it. - SDValue copyFromReg = getCopyFromRegs(V, V->getType()); - if (copyFromReg.getNode()) { + if (SDValue copyFromReg = getCopyFromRegs(V, V->getType())) return copyFromReg; - } // Otherwise create a new SDValue and remember it. SDValue Val = getValueImpl(V); @@ -1206,7 +1271,7 @@ void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { // This will be used by the FuncletLayout pass to determine how to order the // BB's. // A 'catchret' returns to the outer scope's color. - Value *ParentPad = I.getParentPad(); + Value *ParentPad = I.getCatchSwitchParentPad(); const BasicBlock *SuccessorColor; if (isa<ConstantTokenNone>(ParentPad)) SuccessorColor = &FuncInfo.Fn->getEntryBlock(); @@ -1314,6 +1379,18 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SmallVector<ISD::OutputArg, 8> Outs; SmallVector<SDValue, 8> OutVals; + // Calls to @llvm.experimental.deoptimize don't generate a return value, so + // lower + // + // %val = call <ty> @llvm.experimental.deoptimize() + // ret <ty> %val + // + // differently. + if (I.getParent()->getTerminatingDeoptimizeCall()) { + LowerDeoptimizingReturn(); + return; + } + if (!FuncInfo.CanLowerReturn) { unsigned DemoteReg = FuncInfo.DemoteRegister; const Function *F = I.getParent()->getParent(); @@ -1346,11 +1423,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { DAG.getIntPtrConstant(Offsets[i], getCurSDLoc()), &Flags); - Chains[i] = - DAG.getStore(Chain, getCurSDLoc(), - SDValue(RetOp.getNode(), RetOp.getResNo() + i), - // FIXME: better loc info would be nice. - Add, MachinePointerInfo(), false, false, 0); + Chains[i] = DAG.getStore(Chain, getCurSDLoc(), + SDValue(RetOp.getNode(), RetOp.getResNo() + i), + // FIXME: better loc info would be nice. + Add, MachinePointerInfo()); } Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), @@ -1380,7 +1456,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { EVT VT = ValueVTs[j]; if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) - VT = TLI.getTypeForExtArgOrReturn(Context, VT, ExtendKind); + VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind); unsigned NumParts = TLI.getNumRegisters(Context, VT); MVT PartVT = TLI.getRegisterType(Context, VT); @@ -1409,6 +1485,23 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { } } + // Push in swifterror virtual register as the last element of Outs. This makes + // sure swifterror virtual register will be returned in the swifterror + // physical register. + const Function *F = I.getParent()->getParent(); + if (TLI.supportSwiftError() && + F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) { + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + Flags.setSwiftError(); + Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/, + EVT(TLI.getPointerTy(DL)) /*argvt*/, + true /*isfixed*/, 1 /*origidx*/, + 0 /*partOffs*/)); + // Create SDNode for the swifterror virtual register. + OutVals.push_back(DAG.getRegister(FuncInfo.SwiftErrorMap[FuncInfo.MBB][0], + EVT(TLI.getPointerTy(DL)))); + } + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); CallingConv::ID CallConv = DAG.getMachineFunction().getFunction()->getCallingConv(); @@ -1906,6 +1999,27 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, DAG.setRoot(BrCond); } +/// Create a LOAD_STACK_GUARD node, and let it carry the target specific global +/// variable if there exists one. +static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, + SDValue &Chain) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + MachineFunction &MF = DAG.getMachineFunction(); + Value *Global = TLI.getSDagStackGuard(*MF.getFunction()->getParent()); + MachineSDNode *Node = + DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain); + if (Global) { + MachinePointerInfo MPInfo(Global); + MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1); + auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant; + *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8, + DAG.getEVTAlignment(PtrTy)); + Node->setMemRefs(MemRefs, MemRefs + 1); + } + return SDValue(Node, 0); +} + /// Codegen a new tail for a stack protector check ParentMBB which has had its /// tail spliced into a stack protector check success bb. /// @@ -1922,32 +2036,59 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); int FI = MFI->getStackProtectorIndex(); - const Value *IRGuard = SPD.getGuard(); - SDValue GuardPtr = getValue(IRGuard); + SDValue Guard; + SDLoc dl = getCurSDLoc(); SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); + const Module &M = *ParentBB->getParent()->getFunction()->getParent(); + unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext())); - unsigned Align = DL->getPrefTypeAlignment(IRGuard->getType()); + // Generate code to load the content of the guard slot. + SDValue StackSlot = DAG.getLoad( + PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align, + MachineMemOperand::MOVolatile); + + // Retrieve guard check function, nullptr if instrumentation is inlined. + if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) { + // The target provides a guard check function to validate the guard value. + // Generate a call to that function with the content of the guard slot as + // argument. + auto *Fn = cast<Function>(GuardCheck); + FunctionType *FnTy = Fn->getFunctionType(); + assert(FnTy->getNumParams() == 1 && "Invalid function signature"); - SDValue Guard; - SDLoc dl = getCurSDLoc(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = StackSlot; + Entry.Ty = FnTy->getParamType(0); + if (Fn->hasAttribute(1, Attribute::AttrKind::InReg)) + Entry.isInReg = true; + Args.push_back(Entry); - // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the - // guard value from the virtual register holding the value. Otherwise, emit a - // volatile load to retrieve the stack guard value. - unsigned GuardReg = SPD.getGuardReg(); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(getCurSDLoc()) + .setChain(DAG.getEntryNode()) + .setCallee(Fn->getCallingConv(), FnTy->getReturnType(), + getValue(GuardCheck), std::move(Args)); - if (GuardReg && TLI.useLoadStackGuardNode()) - Guard = DAG.getCopyFromReg(DAG.getEntryNode(), dl, GuardReg, - PtrTy); - else - Guard = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(), - GuardPtr, MachinePointerInfo(IRGuard, 0), - true, false, false, Align); + std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); + DAG.setRoot(Result.second); + return; + } - SDValue StackSlot = DAG.getLoad( - PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), true, - false, false, Align); + // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD. + // Otherwise, emit a volatile load to retrieve the stack guard value. + SDValue Chain = DAG.getEntryNode(); + if (TLI.useLoadStackGuardNode()) { + Guard = getLoadStackGuard(DAG, dl, Chain); + } else { + const Value *IRGuard = TLI.getSDagStackGuard(M); + SDValue GuardPtr = getValue(IRGuard); + + Guard = + DAG.getLoad(PtrTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0), + Align, MachineMemOperand::MOVolatile); + } // Perform the comparison via a subtract/getsetcc. EVT VT = Guard.getValueType(); @@ -2115,6 +2256,12 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; const BasicBlock *EHPadBB = I.getSuccessor(1); + // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't + // have to do anything here to lower funclet bundles. + assert(!I.hasOperandBundlesOtherThan( + {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && + "Cannot lower invokes with arbitrary operand bundles yet!"); + const Value *Callee(I.getCalledValue()); const Function *Fn = dyn_cast<Function>(Callee); if (isa<InlineAsm>(Callee)) @@ -2134,8 +2281,15 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { LowerStatepoint(ImmutableStatepoint(&I), EHPadBB); break; } - } else + } else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) { + // Currently we do not lower any intrinsic calls with deopt operand bundles. + // Eventually we will support lowering the @llvm.experimental.deoptimize + // intrinsic, and right now there are no plans to support other intrinsics + // with deopt state. + LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB); + } else { LowerCallTo(&I, getValue(Callee), false, EHPadBB); + } // If the value of the invoke is used outside of its defining block, make it // available as a virtual register. @@ -2309,6 +2463,129 @@ void SelectionDAGBuilder::visitFSub(const User &I) { visitBinary(I, ISD::FSUB); } +/// Checks if the given instruction performs a vector reduction, in which case +/// we have the freedom to alter the elements in the result as long as the +/// reduction of them stays unchanged. +static bool isVectorReductionOp(const User *I) { + const Instruction *Inst = dyn_cast<Instruction>(I); + if (!Inst || !Inst->getType()->isVectorTy()) + return false; + + auto OpCode = Inst->getOpcode(); + switch (OpCode) { + case Instruction::Add: + case Instruction::Mul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + break; + case Instruction::FAdd: + case Instruction::FMul: + if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) + if (FPOp->getFastMathFlags().unsafeAlgebra()) + break; + // Fall through. + default: + return false; + } + + unsigned ElemNum = Inst->getType()->getVectorNumElements(); + unsigned ElemNumToReduce = ElemNum; + + // Do DFS search on the def-use chain from the given instruction. We only + // allow four kinds of operations during the search until we reach the + // instruction that extracts the first element from the vector: + // + // 1. The reduction operation of the same opcode as the given instruction. + // + // 2. PHI node. + // + // 3. ShuffleVector instruction together with a reduction operation that + // does a partial reduction. + // + // 4. ExtractElement that extracts the first element from the vector, and we + // stop searching the def-use chain here. + // + // 3 & 4 above perform a reduction on all elements of the vector. We push defs + // from 1-3 to the stack to continue the DFS. The given instruction is not + // a reduction operation if we meet any other instructions other than those + // listed above. + + SmallVector<const User *, 16> UsersToVisit{Inst}; + SmallPtrSet<const User *, 16> Visited; + bool ReduxExtracted = false; + + while (!UsersToVisit.empty()) { + auto User = UsersToVisit.back(); + UsersToVisit.pop_back(); + if (!Visited.insert(User).second) + continue; + + for (const auto &U : User->users()) { + auto Inst = dyn_cast<Instruction>(U); + if (!Inst) + return false; + + if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) { + if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) + if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().unsafeAlgebra()) + return false; + UsersToVisit.push_back(U); + } else if (const ShuffleVectorInst *ShufInst = + dyn_cast<ShuffleVectorInst>(U)) { + // Detect the following pattern: A ShuffleVector instruction together + // with a reduction that do partial reduction on the first and second + // ElemNumToReduce / 2 elements, and store the result in + // ElemNumToReduce / 2 elements in another vector. + + unsigned ResultElements = ShufInst->getType()->getVectorNumElements(); + if (ResultElements < ElemNum) + return false; + + if (ElemNumToReduce == 1) + return false; + if (!isa<UndefValue>(U->getOperand(1))) + return false; + for (unsigned i = 0; i < ElemNumToReduce / 2; ++i) + if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2)) + return false; + for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i) + if (ShufInst->getMaskValue(i) != -1) + return false; + + // There is only one user of this ShuffleVector instruction, which + // must be a reduction operation. + if (!U->hasOneUse()) + return false; + + auto U2 = dyn_cast<Instruction>(*U->user_begin()); + if (!U2 || U2->getOpcode() != OpCode) + return false; + + // Check operands of the reduction operation. + if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) || + (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) { + UsersToVisit.push_back(U2); + ElemNumToReduce /= 2; + } else + return false; + } else if (isa<ExtractElementInst>(U)) { + // At this moment we should have reduced all elements in the vector. + if (ElemNumToReduce != 1) + return false; + + const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1)); + if (!Val || Val->getZExtValue() != 0) + return false; + + ReduxExtracted = true; + } else + return false; + } + } + return ReduxExtracted; +} + void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); @@ -2316,6 +2593,7 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { bool nuw = false; bool nsw = false; bool exact = false; + bool vec_redux = false; FastMathFlags FMF; if (const OverflowingBinaryOperator *OFBinOp = @@ -2329,10 +2607,16 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I)) FMF = FPOp->getFastMathFlags(); + if (isVectorReductionOp(&I)) { + vec_redux = true; + DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n"); + } + SDNodeFlags Flags; Flags.setExact(exact); Flags.setNoSignedWrap(nsw); Flags.setNoUnsignedWrap(nuw); + Flags.setVectorReduction(vec_redux); if (EnableFMFInDAG) { Flags.setAllowReciprocal(FMF.allowReciprocal()); Flags.setNoInfs(FMF.noInfs()); @@ -2433,7 +2717,7 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); - + // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them. // FIXME: We should propagate the fast-math-flags to the DAG node itself for // further optimization, but currently FMF is only applicable to binary nodes. @@ -2444,6 +2728,14 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); } +// Check if the condition of the select has one use or two users that are both +// selects with the same condition. +static bool hasOnlySelectUsers(const Value *Cond) { + return std::all_of(Cond->user_begin(), Cond->user_end(), [](const Value *V) { + return isa<SelectInst>(V); + }); +} + void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(), @@ -2529,7 +2821,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { // If the underlying comparison instruction is used by any other // instruction, the consumed instructions won't be destroyed, so it is // not profitable to convert to a min/max. - cast<SelectInst>(&I)->getCondition()->hasOneUse()) { + hasOnlySelectUsers(cast<SelectInst>(I).getCondition())) { OpCode = Opc; LHSVal = getValue(LHS); RHSVal = getValue(RHS); @@ -2703,17 +2995,6 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { InVec, InIdx)); } -// Utility for visitShuffleVector - Return true if every element in Mask, -// beginning from position Pos and ending in Pos+Size, falls within the -// specified sequential range [L, L+Pos). or is undef. -static bool isSequentialInRange(const SmallVectorImpl<int> &Mask, - unsigned Pos, unsigned Size, int Low) { - for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low) - if (Mask[i] >= 0 && Mask[i] != Low) - return false; - return true; -} - void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); @@ -2728,8 +3009,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { unsigned SrcNumElts = SrcVT.getVectorNumElements(); if (SrcNumElts == MaskNumElts) { - setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, - &Mask[0])); + setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, Mask)); return; } @@ -2738,29 +3018,46 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // Mask is longer than the source vectors and is a multiple of the source // vectors. We can use concatenate vector to make the mask and vectors // lengths match. - if (SrcNumElts*2 == MaskNumElts) { - // First check for Src1 in low and Src2 in high - if (isSequentialInRange(Mask, 0, SrcNumElts, 0) && - isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) { - // The shuffle is concatenating two vectors together. - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), - VT, Src1, Src2)); - return; + + unsigned NumConcat = MaskNumElts / SrcNumElts; + + // Check if the shuffle is some kind of concatenation of the input vectors. + bool IsConcat = true; + SmallVector<int, 8> ConcatSrcs(NumConcat, -1); + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx < 0) + continue; + // Ensure the indices in each SrcVT sized piece are sequential and that + // the same source is used for the whole piece. + if ((Idx % SrcNumElts != (i % SrcNumElts)) || + (ConcatSrcs[i / SrcNumElts] >= 0 && + ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) { + IsConcat = false; + break; } - // Then check for Src2 in low and Src1 in high - if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) && - isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) { - // The shuffle is concatenating two vectors together. - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), - VT, Src2, Src1)); - return; + // Remember which source this index came from. + ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; + } + + // The shuffle is concatenating multiple vectors together. Just emit + // a CONCAT_VECTORS operation. + if (IsConcat) { + SmallVector<SDValue, 8> ConcatOps; + for (auto Src : ConcatSrcs) { + if (Src < 0) + ConcatOps.push_back(DAG.getUNDEF(SrcVT)); + else if (Src == 0) + ConcatOps.push_back(Src1); + else + ConcatOps.push_back(Src2); } + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), + VT, ConcatOps)); + return; } // Pad both vectors with undefs to make them the same length as the mask. - unsigned NumConcat = MaskNumElts / SrcNumElts; - bool Src1U = Src1.getOpcode() == ISD::UNDEF; - bool Src2U = Src2.getOpcode() == ISD::UNDEF; SDValue UndefVal = DAG.getUNDEF(SrcVT); SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal); @@ -2768,10 +3065,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { MOps1[0] = Src1; MOps2[0] = Src2; - Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurSDLoc(), VT, MOps1); - Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurSDLoc(), VT, MOps2); + Src1 = Src1.isUndef() ? DAG.getUNDEF(VT) + : DAG.getNode(ISD::CONCAT_VECTORS, + getCurSDLoc(), VT, MOps1); + Src2 = Src2.isUndef() ? DAG.getUNDEF(VT) + : DAG.getNode(ISD::CONCAT_VECTORS, + getCurSDLoc(), VT, MOps2); // Readjust mask for new input vector length. SmallVector<int, 8> MappedOps; @@ -2783,7 +3082,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { } setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, - &MappedOps[0])); + MappedOps)); return; } @@ -2864,7 +3163,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { } setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, - &MappedOps[0])); + MappedOps)); return; } } @@ -2982,8 +3281,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Value *Op0 = I.getOperand(0); // Note that the pointer operand may be a vector of pointers. Take the scalar // element which holds a pointer. - Type *Ty = Op0->getType()->getScalarType(); - unsigned AS = Ty->getPointerAddressSpace(); + unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace(); SDValue N = getValue(Op0); SDLoc dl = getCurSDLoc(); @@ -2993,14 +3291,15 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { cast<VectorType>(I.getType())->getVectorNumElements() : 0; if (VectorWidth && !N.getValueType().isVector()) { - MVT VT = MVT::getVectorVT(N.getValueType().getSimpleVT(), VectorWidth); + LLVMContext &Context = *DAG.getContext(); + EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth); SmallVector<SDValue, 16> Ops(VectorWidth, N); N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } - for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); - OI != E; ++OI) { - const Value *Idx = *OI; - if (StructType *StTy = dyn_cast<StructType>(Ty)) { + for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I); + GTI != E; ++GTI) { + const Value *Idx = GTI.getOperand(); + if (StructType *StTy = dyn_cast<StructType>(*GTI)) { unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); if (Field) { // N = N + Offset @@ -3015,14 +3314,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, DAG.getConstant(Offset, dl, N.getValueType()), &Flags); } - - Ty = StTy->getElementType(Field); } else { - Ty = cast<SequentialType>(Ty)->getElementType(); MVT PtrTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout(), AS); unsigned PtrSize = PtrTy.getSizeInBits(); - APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty)); + APInt ElementSize(PtrSize, DL->getTypeAllocSize(GTI.getIndexedType())); // If this is a scalar constant or a splat vector of constants, // handle it quickly. @@ -3055,7 +3351,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (!IdxN.getValueType().isVector() && VectorWidth) { MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth); SmallVector<SDValue, 16> Ops(VectorWidth, IdxN); - IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } // If the index is smaller or larger than intptr_t, truncate or extend // it. @@ -3144,7 +3440,22 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (I.isAtomic()) return visitAtomicLoad(I); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const Value *SV = I.getOperand(0); + if (TLI.supportSwiftError()) { + // Swifterror values can come from either a function parameter with + // swifterror attribute or an alloca with swifterror attribute. + if (const Argument *Arg = dyn_cast<Argument>(SV)) { + if (Arg->hasSwiftErrorAttr()) + return visitLoadFromSwiftError(I); + } + + if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { + if (Alloca->isSwiftError()) + return visitLoadFromSwiftError(I); + } + } + SDValue Ptr = getValue(SV); Type *Ty = I.getType(); @@ -3168,7 +3479,6 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { I.getAAMetadata(AAInfo); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets); @@ -3223,10 +3533,17 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { PtrVT, Ptr, DAG.getConstant(Offsets[i], dl, PtrVT), &Flags); - SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, - A, MachinePointerInfo(SV, Offsets[i]), isVolatile, - isNonTemporal, isInvariant, Alignment, AAInfo, - Ranges); + auto MMOFlags = MachineMemOperand::MONone; + if (isVolatile) + MMOFlags |= MachineMemOperand::MOVolatile; + if (isNonTemporal) + MMOFlags |= MachineMemOperand::MONonTemporal; + if (isInvariant) + MMOFlags |= MachineMemOperand::MOInvariant; + + SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A, + MachinePointerInfo(SV, Offsets[i]), Alignment, + MMOFlags, AAInfo, Ranges); Values[i] = L; Chains[ChainI] = L.getValue(1); @@ -3245,6 +3562,64 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { DAG.getVTList(ValueVTs), Values)); } +void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + assert(TLI.supportSwiftError() && + "call visitStoreToSwiftError when backend supports swifterror"); + + SmallVector<EVT, 4> ValueVTs; + SmallVector<uint64_t, 4> Offsets; + const Value *SrcV = I.getOperand(0); + ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), + SrcV->getType(), ValueVTs, &Offsets); + assert(ValueVTs.size() == 1 && Offsets[0] == 0 && + "expect a single EVT for swifterror"); + + SDValue Src = getValue(SrcV); + // Create a virtual register, then update the virtual register. + auto &DL = DAG.getDataLayout(); + const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); + unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); + // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue + // Chain can be getRoot or getControlRoot. + SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg, + SDValue(Src.getNode(), Src.getResNo())); + DAG.setRoot(CopyNode); + FuncInfo.setSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg); +} + +void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { + assert(DAG.getTargetLoweringInfo().supportSwiftError() && + "call visitLoadFromSwiftError when backend supports swifterror"); + + assert(!I.isVolatile() && + I.getMetadata(LLVMContext::MD_nontemporal) == nullptr && + I.getMetadata(LLVMContext::MD_invariant_load) == nullptr && + "Support volatile, non temporal, invariant for load_from_swift_error"); + + const Value *SV = I.getOperand(0); + Type *Ty = I.getType(); + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + assert(!AA->pointsToConstantMemory(MemoryLocation( + SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo)) && + "load_from_swift_error should not be constant memory"); + + SmallVector<EVT, 4> ValueVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty, + ValueVTs, &Offsets); + assert(ValueVTs.size() == 1 && Offsets[0] == 0 && + "expect a single EVT for swifterror"); + + // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT + SDValue L = DAG.getCopyFromReg(getRoot(), getCurSDLoc(), + FuncInfo.findSwiftErrorVReg(FuncInfo.MBB, SV), + ValueVTs[0]); + + setValue(&I, L); +} + void SelectionDAGBuilder::visitStore(const StoreInst &I) { if (I.isAtomic()) return visitAtomicStore(I); @@ -3252,6 +3627,21 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { const Value *SrcV = I.getOperand(0); const Value *PtrV = I.getOperand(1); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.supportSwiftError()) { + // Swifterror values can come from either a function parameter with + // swifterror attribute or an alloca with swifterror attribute. + if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { + if (Arg->hasSwiftErrorAttr()) + return visitStoreToSwiftError(I); + } + + if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { + if (Alloca->isSwiftError()) + return visitStoreToSwiftError(I); + } + } + SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), @@ -3268,15 +3658,18 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SDValue Root = getRoot(); SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); + SDLoc dl = getCurSDLoc(); EVT PtrVT = Ptr.getValueType(); - bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; unsigned Alignment = I.getAlignment(); - SDLoc dl = getCurSDLoc(); - AAMDNodes AAInfo; I.getAAMetadata(AAInfo); + auto MMOFlags = MachineMemOperand::MONone; + if (I.isVolatile()) + MMOFlags |= MachineMemOperand::MOVolatile; + if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr) + MMOFlags |= MachineMemOperand::MONonTemporal; + // An aggregate load cannot wrap around the address space, so offsets to its // parts don't wrap either. SDNodeFlags Flags; @@ -3293,10 +3686,9 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { } SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, DAG.getConstant(Offsets[i], dl, PtrVT), &Flags); - SDValue St = DAG.getStore(Root, dl, - SDValue(Src.getNode(), Src.getResNo() + i), - Add, MachinePointerInfo(PtrV, Offsets[i]), - isVolatile, isNonTemporal, Alignment, AAInfo); + SDValue St = DAG.getStore( + Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add, + MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo); Chains[ChainI] = St; } @@ -3447,13 +3839,10 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { I.getAAMetadata(AAInfo); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); - SDValue InChain = DAG.getRoot(); - if (AA->pointsToConstantMemory(MemoryLocation( - PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), - AAInfo))) { - // Do not serialize (non-volatile) loads of constant memory with anything. - InChain = DAG.getEntryNode(); - } + // Do not serialize masked loads of constant memory with anything. + bool AddToChain = !AA->pointsToConstantMemory(MemoryLocation( + PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo)); + SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); MachineMemOperand *MMO = DAG.getMachineFunction(). @@ -3463,8 +3852,10 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, ISD::NON_EXTLOAD); - SDValue OutChain = Load.getValue(1); - DAG.setRoot(OutChain); + if (AddToChain) { + SDValue OutChain = Load.getValue(1); + DAG.setRoot(OutChain); + } setValue(&I, Load); } @@ -3585,7 +3976,7 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Ops[3]; Ops[0] = getRoot(); - Ops[1] = DAG.getConstant(I.getOrdering(), dl, + Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl, TLI.getPointerTy(DAG.getDataLayout())); Ops[2] = DAG.getConstant(I.getSynchScope(), dl, TLI.getPointerTy(DAG.getDataLayout())); @@ -3724,7 +4115,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy); Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); - } + } else + Result = lowerRangeToAssertZExt(DAG, I, Result); setValue(&I, Result); } @@ -3736,8 +4128,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, /// Op = (Op & 0x007fffff) | 0x3f800000; /// /// where Op is the hexadecimal representation of floating point value. -static SDValue -GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) { +static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) { SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x007fffff, dl, MVT::i32)); SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, @@ -3750,9 +4141,8 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) { /// (float)(int)(((Op & 0x7f800000) >> 23) - 127); /// /// where Op is the hexadecimal representation of floating point value. -static SDValue -GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, - SDLoc dl) { +static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, + const TargetLowering &TLI, const SDLoc &dl) { SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x7f800000, dl, MVT::i32)); SDValue t1 = DAG.getNode( @@ -3764,13 +4154,13 @@ GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, } /// getF32Constant - Get 32-bit floating point constant. -static SDValue -getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) { +static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt, + const SDLoc &dl) { return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), dl, MVT::f32); } -static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, +static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl, SelectionDAG &DAG) { // TODO: What fast-math-flags should be set on the floating-point nodes? @@ -3862,7 +4252,7 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, /// expandExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -3885,9 +4275,9 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// expandLog - Lower a log intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { - + // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -3984,9 +4374,9 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { - + // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -4082,7 +4472,7 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { // TODO: What fast-math-flags should be set on the floating-point nodes? @@ -4173,7 +4563,7 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) @@ -4185,7 +4575,7 @@ static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, /// visitPow - Lower a pow intrinsic. Handles the special sequences for /// limited-precision mode with x == 10.0f. -static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, +static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const TargetLowering &TLI) { bool IsExp10 = false; if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && @@ -4214,7 +4604,7 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, /// ExpandPowI - Expand a llvm.powi intrinsic. -static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, +static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, SelectionDAG &DAG) { // If RHS is a constant, we can expand this out to a multiplication tree, // otherwise we end up lowering to a call to __powidf2 (for example). When @@ -4609,18 +4999,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); return nullptr; case Intrinsic::eh_dwarf_cfa: { - SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, - TLI.getPointerTy(DAG.getDataLayout())); - SDValue Offset = DAG.getNode(ISD::ADD, sdl, - CfaArg.getValueType(), - DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl, - CfaArg.getValueType()), - CfaArg); - SDValue FA = DAG.getNode( - ISD::FRAMEADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()), - DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()))); - setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), - FA, Offset)); + setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl, + TLI.getPointerTy(DAG.getDataLayout()), + getValue(I.getArgOperand(0)))); return nullptr; } case Intrinsic::eh_sjlj_callsite: { @@ -4798,7 +5179,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::trunc: case Intrinsic::rint: case Intrinsic::nearbyint: - case Intrinsic::round: { + case Intrinsic::round: + case Intrinsic::canonicalize: { unsigned Opcode; switch (Intrinsic) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. @@ -4812,6 +5194,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; case Intrinsic::round: Opcode = ISD::FROUND; break; + case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break; } setValue(&I, DAG.getNode(Opcode, sdl, @@ -4819,18 +5202,28 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)))); return nullptr; } - case Intrinsic::minnum: - setValue(&I, DAG.getNode(ISD::FMINNUM, sdl, - getValue(I.getArgOperand(0)).getValueType(), + case Intrinsic::minnum: { + auto VT = getValue(I.getArgOperand(0)).getValueType(); + unsigned Opc = + I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT) + ? ISD::FMINNAN + : ISD::FMINNUM; + setValue(&I, DAG.getNode(Opc, sdl, VT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); return nullptr; - case Intrinsic::maxnum: - setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl, - getValue(I.getArgOperand(0)).getValueType(), + } + case Intrinsic::maxnum: { + auto VT = getValue(I.getArgOperand(0)).getValueType(); + unsigned Opc = + I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT) + ? ISD::FMAXNAN + : ISD::FMAXNUM; + setValue(&I, DAG.getNode(Opc, sdl, VT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); return nullptr; + } case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), @@ -4954,47 +5347,35 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, Res); return nullptr; } + case Intrinsic::stackguard: { + EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + MachineFunction &MF = DAG.getMachineFunction(); + const Module &M = *MF.getFunction()->getParent(); + SDValue Chain = getRoot(); + if (TLI.useLoadStackGuardNode()) { + Res = getLoadStackGuard(DAG, sdl, Chain); + } else { + const Value *Global = TLI.getSDagStackGuard(M); + unsigned Align = DL->getPrefTypeAlignment(Global->getType()); + Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global), + MachinePointerInfo(Global, 0), Align, + MachineMemOperand::MOVolatile); + } + DAG.setRoot(Chain); + setValue(&I, Res); + return nullptr; + } case Intrinsic::stackprotector: { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); SDValue Src, Chain = getRoot(); - const Value *Ptr = cast<LoadInst>(I.getArgOperand(0))->getPointerOperand(); - const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr); - - // See if Ptr is a bitcast. If it is, look through it and see if we can get - // global variable __stack_chk_guard. - if (!GV) - if (const Operator *BC = dyn_cast<Operator>(Ptr)) - if (BC->getOpcode() == Instruction::BitCast) - GV = dyn_cast<GlobalVariable>(BC->getOperand(0)); - - if (GV && TLI.useLoadStackGuardNode()) { - // Emit a LOAD_STACK_GUARD node. - MachineSDNode *Node = DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, - sdl, PtrTy, Chain); - MachinePointerInfo MPInfo(GV); - MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1); - unsigned Flags = MachineMemOperand::MOLoad | - MachineMemOperand::MOInvariant; - *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, - PtrTy.getSizeInBits() / 8, - DAG.getEVTAlignment(PtrTy)); - Node->setMemRefs(MemRefs, MemRefs + 1); - - // Copy the guard value to a virtual register so that it can be - // retrieved in the epilogue. - Src = SDValue(Node, 0); - const TargetRegisterClass *RC = - TLI.getRegClassFor(Src.getSimpleValueType()); - unsigned Reg = MF.getRegInfo().createVirtualRegister(RC); - - SPDescriptor.setGuardReg(Reg); - Chain = DAG.getCopyToReg(Chain, sdl, Reg, Src); - } else { + + if (TLI.useLoadStackGuardNode()) + Src = getLoadStackGuard(DAG, sdl, Chain); + else Src = getValue(I.getArgOperand(0)); // The guard's value. - } AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); @@ -5006,7 +5387,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Store the stack protector onto the stack. Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FI), - true, false, 0); + /* Alignment = */ 0, MachineMemOperand::MOVolatile); setValue(&I, Res); DAG.setRoot(Res); return nullptr; @@ -5060,15 +5441,20 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)))); return nullptr; } - case Intrinsic::gcroot: - if (GFI) { - const Value *Alloca = I.getArgOperand(0)->stripPointerCasts(); - const Constant *TypeMap = cast<Constant>(I.getArgOperand(1)); - - FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); - GFI->addStackRoot(FI->getIndex(), TypeMap); - } + case Intrinsic::gcroot: { + MachineFunction &MF = DAG.getMachineFunction(); + const Function *F = MF.getFunction(); + (void)F; + assert(F->hasGC() && + "only valid in functions with gc specified, enforced by Verifier"); + assert(GFI && "implied by previous"); + const Value *Alloca = I.getArgOperand(0)->stripPointerCasts(); + const Constant *TypeMap = cast<Constant>(I.getArgOperand(1)); + + FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); + GFI->addStackRoot(FI->getIndex(), TypeMap); return nullptr; + } case Intrinsic::gcread: case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); @@ -5101,7 +5487,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { CallingConv::C, I.getType(), DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args), 0); + std::move(Args)); std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); @@ -5193,18 +5579,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::invariant_end: // Discard region information. return nullptr; - case Intrinsic::stackprotectorcheck: { - // Do not actually emit anything for this basic block. Instead we initialize - // the stack protector descriptor and export the guard variable so we can - // access it in FinishBasicBlock. - const BasicBlock *BB = I.getParent(); - SPDescriptor.initialize(BB, FuncInfo.MBBMap[BB], I); - ExportFromCurrentBlock(SPDescriptor.getGuard()); - - // Flush our exports since we are going to process a terminator. - (void)getControlRoot(); - return nullptr; - } case Intrinsic::clear_cache: return TLI.getClearCacheBuiltinName(); case Intrinsic::donothing: @@ -5220,11 +5594,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::experimental_gc_statepoint: { - visitStatepoint(I); + LowerStatepoint(ImmutableStatepoint(&I)); return nullptr; } case Intrinsic::experimental_gc_result: { - visitGCResult(I); + visitGCResult(cast<GCResultInst>(I)); return nullptr; } case Intrinsic::experimental_gc_relocate: { @@ -5303,6 +5677,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, N); return nullptr; } + + case Intrinsic::experimental_deoptimize: + LowerDeoptimizeCall(&I); + return nullptr; } } @@ -5378,14 +5756,16 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool isTailCall, const BasicBlock *EHPadBB) { - PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); - FunctionType *FTy = cast<FunctionType>(PT->getElementType()); - Type *RetTy = FTy->getReturnType(); + auto &DL = DAG.getDataLayout(); + FunctionType *FTy = CS.getFunctionType(); + Type *RetTy = CS.getType(); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Args.reserve(CS.arg_size()); + const Value *SwiftErrorVal = nullptr; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { const Value *V = *i; @@ -5399,6 +5779,17 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Skip the first return-type Attribute to get to params. Entry.setAttributes(&CS, i - CS.arg_begin() + 1); + + // Use swifterror virtual register as input to the call. + if (Entry.isSwiftError && TLI.supportSwiftError()) { + SwiftErrorVal = V; + // We find the virtual register for the actual swifterror argument. + // Instead of using the Value, we use the virtual register instead. + Entry.Node = DAG.getRegister( + FuncInfo.findSwiftErrorVReg(FuncInfo.MBB, V), + EVT(TLI.getPointerTy(DL))); + } + Args.push_back(Entry); // If we have an explicit sret argument that is an Instruction, (i.e., it @@ -5413,13 +5804,32 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, isTailCall = false; TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) - .setCallee(RetTy, FTy, Callee, std::move(Args), CS) - .setTailCall(isTailCall); + CLI.setDebugLoc(getCurSDLoc()) + .setChain(getRoot()) + .setCallee(RetTy, FTy, Callee, std::move(Args), CS) + .setTailCall(isTailCall) + .setConvergent(CS.isConvergent()); std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); - if (Result.first.getNode()) - setValue(CS.getInstruction(), Result.first); + if (Result.first.getNode()) { + const Instruction *Inst = CS.getInstruction(); + Result.first = lowerRangeToAssertZExt(DAG, *Inst, Result.first); + setValue(Inst, Result.first); + } + + // The last element of CLI.InVals has the SDValue for swifterror return. + // Here we copy it to a virtual register and update SwiftErrorMap for + // book-keeping. + if (SwiftErrorVal && TLI.supportSwiftError()) { + // Get the last element of InVals. + SDValue Src = CLI.InVals.back(); + const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); + unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); + SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); + // We update the virtual register for the actual swifterror argument. + FuncInfo.setSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg); + DAG.setRoot(CopyNode); + } } /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the @@ -5449,7 +5859,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, PointerType::getUnqual(LoadTy)); if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr( - const_cast<Constant *>(LoadInput), *Builder.DL)) + const_cast<Constant *>(LoadInput), LoadTy, *Builder.DL)) return Builder.getValue(LoadCst); } @@ -5470,9 +5880,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, SDValue Ptr = Builder.getValue(PtrVal); SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, Ptr, MachinePointerInfo(PtrVal), - false /*volatile*/, - false /*nontemporal*/, - false /*isinvariant*/, 1 /* align=1 */); + /* Alignment = */ 1); if (!ConstantMemory) Builder.PendingLoads.push_back(LoadVal.getValue(1)); @@ -5516,7 +5924,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { return true; } - const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS), getValue(Size), @@ -5613,7 +6021,7 @@ bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { !I.getType()->isPointerTy()) return false; - const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Src), getValue(Char), getValue(Length), @@ -5641,7 +6049,7 @@ bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { !I.getType()->isPointerTy()) return false; - const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(), getValue(Arg0), getValue(Arg1), @@ -5670,7 +6078,7 @@ bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { !I.getType()->isIntegerTy()) return false; - const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Arg0), getValue(Arg1), @@ -5697,7 +6105,7 @@ bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy()) return false; - const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Arg0), MachinePointerInfo(Arg0)); @@ -5724,7 +6132,7 @@ bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { !I.getType()->isIntegerTy()) return false; - const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(), getValue(Arg0), getValue(Arg1), @@ -5803,9 +6211,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { } // Check for well-known libc/libm calls. If the function is internal, it - // can't be a library call. + // can't be a library call. Don't do the check if marked as nobuiltin for + // some reason. LibFunc::Func Func; - if (!F->hasLocalLinkage() && F->hasName() && + if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() && LibInfo->getLibFunc(F->getName(), Func) && LibInfo->hasOptimizedCodeGen(Func)) { switch (Func) { @@ -5952,9 +6361,19 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { RenameFn, DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); - // Check if we can potentially perform a tail call. More detailed checking is - // be done within LowerCallTo, after more information about the call is known. - LowerCallTo(&I, Callee, I.isTailCall()); + // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't + // have to do anything here to lower funclet bundles. + assert(!I.hasOperandBundlesOtherThan( + {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && + "Cannot lower calls with arbitrary operand bundles!"); + + if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) + LowerCallSiteWithDeoptBundle(&I, Callee, nullptr); + else + // Check if we can potentially perform a tail call. More detailed checking + // is be done within LowerCallTo, after more information about the call is + // known. + LowerCallTo(&I, Callee, I.isTailCall()); } namespace { @@ -6036,9 +6455,8 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; /// /// OpInfo describes the operand. /// -static void GetRegistersForValue(SelectionDAG &DAG, - const TargetLowering &TLI, - SDLoc DL, +static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, + const SDLoc &DL, SDISelAsmOperandInfo &OpInfo) { LLVMContext &Context = *DAG.getContext(); @@ -6301,8 +6719,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout())); Chain = DAG.getStore( Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), - false, false, 0); + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI)); OpInfo.CallOperand = StackSlot; } @@ -6349,6 +6766,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { ExtraInfo |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) ExtraInfo |= InlineAsm::Extra_IsAlignStack; + if (CS.isConvergent()) + ExtraInfo |= InlineAsm::Extra_IsConvergent; // Set the asm dialect. ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; @@ -6413,10 +6832,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Copy the output from the appropriate register. Find a register that // we can use. if (OpInfo.AssignedRegs.Regs.empty()) { - LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), - "couldn't allocate output register for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + emitInlineAsmError( + CS, "couldn't allocate output register for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } @@ -6469,10 +6887,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. if (OpInfo.isIndirect) { // This happens on gcc/testsuite/gcc.dg/pr8788-1.c - LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" - " don't know how to handle tied " - "indirect register inputs"); + emitInlineAsmError(CS, "inline asm not supported yet:" + " don't know how to handle tied " + "indirect register inputs"); return; } @@ -6486,10 +6903,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC)); else { - LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), - "inline asm error: This value" - " type register class is not natively supported!"); + emitInlineAsmError( + CS, "inline asm error: This value" + " type register class is not natively supported!"); return; } } @@ -6527,10 +6943,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, Ops, DAG); if (Ops.empty()) { - LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), - "invalid operand for inline asm constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + emitInlineAsmError(CS, "invalid operand for inline asm constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } @@ -6570,20 +6984,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // TODO: Support this. if (OpInfo.isIndirect) { - LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), - "Don't know how to handle indirect register inputs yet " - "for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + emitInlineAsmError( + CS, "Don't know how to handle indirect register inputs yet " + "for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty()) { - LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), - "couldn't allocate input reg for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + emitInlineAsmError(CS, "couldn't allocate input reg for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } @@ -6667,11 +7078,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Emit the non-flagged stores from the physregs. SmallVector<SDValue, 8> OutChains; for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) { - SDValue Val = DAG.getStore(Chain, getCurSDLoc(), - StoresToEmit[i].first, + SDValue Val = DAG.getStore(Chain, getCurSDLoc(), StoresToEmit[i].first, getValue(StoresToEmit[i].second), - MachinePointerInfo(StoresToEmit[i].second), - false, false, 0); + MachinePointerInfo(StoresToEmit[i].second)); OutChains.push_back(Val); } @@ -6681,6 +7090,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { DAG.setRoot(Chain); } +void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS, + const Twine &Message) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), Message); + + // Make sure we leave the DAG in a valid state + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + auto VT = TLI.getValueType(DAG.getDataLayout(), CS.getType()); + setValue(CS.getInstruction(), DAG.getUNDEF(VT)); +} + void SelectionDAGBuilder::visitVAStart(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(), MVT::Other, getRoot(), @@ -6715,16 +7135,49 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { DAG.getSrcValue(I.getArgOperand(1)))); } -/// \brief Lower an argument list according to the target calling convention. -/// -/// \return A tuple of <return-value, token-chain> +SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, + const Instruction &I, + SDValue Op) { + const MDNode *Range = I.getMetadata(LLVMContext::MD_range); + if (!Range) + return Op; + + Constant *Lo = cast<ConstantAsMetadata>(Range->getOperand(0))->getValue(); + if (!Lo->isNullValue()) + return Op; + + Constant *Hi = cast<ConstantAsMetadata>(Range->getOperand(1))->getValue(); + unsigned Bits = cast<ConstantInt>(Hi)->getValue().logBase2(); + + EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits); + + SDLoc SL = getCurSDLoc(); + + SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), + Op, DAG.getValueType(SmallVT)); + unsigned NumVals = Op.getNode()->getNumValues(); + if (NumVals == 1) + return ZExt; + + SmallVector<SDValue, 4> Ops; + + Ops.push_back(ZExt); + for (unsigned I = 1; I != NumVals; ++I) + Ops.push_back(Op.getValue(I)); + + return DAG.getMergeValues(Ops, SL); +} + +/// \brief Populate a CallLowerinInfo (into \p CLI) based on the properties of +/// the call being lowered. /// /// This is a helper for lowering intrinsics that follow a target calling /// convention or require stack pointer adjustment. Only a subset of the /// intrinsic's operands need to participate in the calling convention. -std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands( - ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, - Type *ReturnTy, const BasicBlock *EHPadBB, bool IsPatchPoint) { +void SelectionDAGBuilder::populateCallLoweringInfo( + TargetLowering::CallLoweringInfo &CLI, ImmutableCallSite CS, + unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy, + bool IsPatchPoint) { TargetLowering::ArgListTy Args; Args.reserve(NumArgs); @@ -6743,12 +7196,11 @@ std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands( Args.push_back(Entry); } - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) - .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs) - .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint); - - return lowerInvokable(CLI, EHPadBB); + CLI.setDebugLoc(getCurSDLoc()) + .setChain(getRoot()) + .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args)) + .setDiscardResult(CS->use_empty()) + .setIsPatchPoint(IsPatchPoint); } /// \brief Add a stack map intrinsic call's live variable operands to a stackmap @@ -6769,7 +7221,7 @@ std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands( /// only available in a register, then the runtime would need to trap when /// execution reaches the StackMap in order to read the alloca's location. static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, - SDLoc DL, SmallVectorImpl<SDValue> &Ops, + const SDLoc &DL, SmallVectorImpl<SDValue> &Ops, SelectionDAGBuilder &Builder) { for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) { SDValue OpVal = Builder.getValue(CS.getArgument(i)); @@ -6889,8 +7341,11 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; Type *ReturnTy = IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); - std::pair<SDValue, SDValue> Result = lowerCallOperands( - CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, EHPadBB, true); + + TargetLowering::CallLoweringInfo CLI(DAG); + populateCallLoweringInfo(CLI, CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, + true); + std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); SDNode *CallEnd = Result.second.getNode(); if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) @@ -7057,6 +7512,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Entry.isNest = false; Entry.isByVal = false; Entry.isReturned = false; + Entry.isSwiftSelf = false; + Entry.isSwiftError = false; Entry.Alignment = Align; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); @@ -7085,10 +7542,23 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } } + // We push in swifterror return as the last element of CLI.Ins. + ArgListTy &Args = CLI.getArgs(); + if (supportSwiftError()) { + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + if (Args[i].isSwiftError) { + ISD::InputArg MyFlags; + MyFlags.VT = getPointerTy(DL); + MyFlags.ArgVT = EVT(getPointerTy(DL)); + MyFlags.Flags.setSwiftError(); + CLI.Ins.push_back(MyFlags); + } + } + } + // Handle all of the outgoing arguments. CLI.Outs.clear(); CLI.OutVals.clear(); - ArgListTy &Args = CLI.getArgs(); for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs); @@ -7114,6 +7584,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setInReg(); if (Args[i].isSRet) Flags.setSRet(); + if (Args[i].isSwiftSelf) + Flags.setSwiftSelf(); + if (Args[i].isSwiftError) + Flags.setSwiftError(); if (Args[i].isByVal) Flags.setByVal(); if (Args[i].isInAlloca) { @@ -7202,6 +7676,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { SmallVector<SDValue, 4> InVals; CLI.Chain = LowerCall(CLI, InVals); + // Update CLI.InVals to use outside of this function. + CLI.InVals = InVals; + // Verify that the target's LowerCall behaved as expected. assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other && "LowerCall didn't return a valid chain!"); @@ -7219,12 +7696,13 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { return std::make_pair(SDValue(), SDValue()); } - DEBUG(for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) { - assert(InVals[i].getNode() && - "LowerCall emitted a null value!"); - assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() && - "LowerCall emitted a value with the wrong type!"); - }); +#ifndef NDEBUG + for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) { + assert(InVals[i].getNode() && "LowerCall emitted a null value!"); + assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() && + "LowerCall emitted a value with the wrong type!"); + } +#endif SmallVector<SDValue, 4> ReturnValues; if (!CanLowerReturn) { @@ -7254,7 +7732,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), DemoteStackIdx, Offsets[i]), - false, false, false, 1); + /* Alignment = */ 1); ReturnValues[i] = L; Chains[i] = L.getValue(1); } @@ -7263,7 +7741,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } else { // Collect the legal value parts into potentially illegal values // that correspond to the original function's return values. - ISD::NodeType AssertOp = ISD::DELETED_NODE; + Optional<ISD::NodeType> AssertOp; if (CLI.RetSExt) AssertOp = ISD::AssertSext; else if (CLI.RetZExt) @@ -7295,8 +7773,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { void TargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { - SDValue Res = LowerOperation(SDValue(N, 0), DAG); - if (Res.getNode()) + if (SDValue Res = LowerOperation(SDValue(N, 0), DAG)) Results.push_back(Res); } @@ -7394,6 +7871,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setInReg(); if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) Flags.setSRet(); + if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftSelf)) + Flags.setSwiftSelf(); + if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftError)) + Flags.setSwiftError(); if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) Flags.setByVal(); if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) { @@ -7483,7 +7964,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { PointerType::getUnqual(F.getReturnType()), ValueVTs); MVT VT = ValueVTs[0].getSimpleVT(); MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); - ISD::NodeType AssertOp = ISD::DELETED_NODE; + Optional<ISD::NodeType> AssertOp = None; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, nullptr, AssertOp); @@ -7524,7 +8005,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT); if (!I->use_empty()) { - ISD::NodeType AssertOp = ISD::DELETED_NODE; + Optional<ISD::NodeType> AssertOp; if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) AssertOp = ISD::AssertSext; else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) @@ -7559,6 +8040,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) { FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); } + // Update SwiftErrorMap. + if (Res.getOpcode() == ISD::CopyFromReg && TLI->supportSwiftError() && + F.getAttributes().hasAttribute(Idx, Attribute::SwiftError)) { + unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + FuncInfo->SwiftErrorMap[FuncInfo->MBB][0] = Reg; + } + // If this argument is live outside of the entry block, insert a copy from // wherever we got it to the vreg that other BB's will reference it as. if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { @@ -7656,7 +8145,8 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { EVT VT = ValueVTs[vti]; unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); for (unsigned i = 0, e = NumRegisters; i != e; ++i) - FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); + FuncInfo.PHINodesToUpdate.push_back( + std::make_pair(&*MBBI++, Reg + i)); Reg += NumRegisters; } } @@ -7708,7 +8198,8 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, unsigned *TotalCases, unsigned First, - unsigned Last) { + unsigned Last, + unsigned Density) { assert(Last >= First); assert(TotalCases[Last] >= TotalCases[First]); @@ -7729,10 +8220,15 @@ bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, assert(NumCases < UINT64_MAX / 100); assert(Range >= NumCases); - return NumCases * 100 >= Range * MinJumpTableDensity; + return NumCases * 100 >= Range * Density; } -static inline bool areJTsAllowed(const TargetLowering &TLI) { +static inline bool areJTsAllowed(const TargetLowering &TLI, + const SwitchInst *SI) { + const Function *Fn = SI->getParent()->getParent(); + if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true") + return false; + return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); } @@ -7826,7 +8322,7 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, #endif const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!areJTsAllowed(TLI)) + if (!areJTsAllowed(TLI, SI)) return; const int64_t N = Clusters.size(); @@ -7843,7 +8339,11 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, TotalCases[i] += TotalCases[i - 1]; } - if (N >= MinJumpTableSize && isDense(Clusters, &TotalCases[0], 0, N - 1)) { + unsigned MinDensity = JumpTableDensity; + if (DefaultMBB->getParent()->getFunction()->optForSize()) + MinDensity = OptsizeJumpTableDensity; + if (N >= MinJumpTableSize + && isDense(Clusters, &TotalCases[0], 0, N - 1, MinDensity)) { // Cheap case: the whole range might be suitable for jump table. CaseCluster JTCluster; if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { @@ -7888,7 +8388,7 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, // Search for a solution that results in fewer partitions. for (int64_t j = N - 1; j > i; j--) { // Try building a partition from Clusters[i..j]. - if (isDense(Clusters, &TotalCases[0], i, j)) { + if (isDense(Clusters, &TotalCases[0], i, j, MinDensity)) { unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); bool IsTable = j - i + 1 >= MinJumpTableSize; unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 8fb85ff..b9888ae 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -18,14 +18,14 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/CallSite.h" -#include "llvm/IR/Statepoint.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Statepoint.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLowering.h" +#include <utility> #include <vector> namespace llvm { @@ -101,8 +101,8 @@ class SelectionDAGBuilder { unsigned SDNodeOrder; public: DanglingDebugInfo() : DI(nullptr), dl(DebugLoc()), SDNodeOrder(0) { } - DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) : - DI(di), dl(DL), SDNodeOrder(SDNO) { } + DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) + : DI(di), dl(std::move(DL)), SDNodeOrder(SDNO) {} const DbgValueInst* getDI() { return DI; } DebugLoc getdl() { return dl; } unsigned getSDNodeOrder() { return SDNodeOrder; } @@ -260,8 +260,9 @@ private: }; struct JumpTableHeader { JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H, - bool E = false): - First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {} + bool E = false) + : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H), + Emitted(E) {} APInt First; APInt Last; const Value *SValue; @@ -286,9 +287,9 @@ private: BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D, BitTestInfo C, BranchProbability Pr) - : First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), - ContiguousRange(CR), Parent(P), Default(D), Cases(std::move(C)), - Prob(Pr) {} + : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg), + RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D), + Cases(std::move(C)), Prob(Pr) {} APInt First; APInt Range; const Value *SValue; @@ -303,12 +304,9 @@ private: BranchProbability DefaultProb; }; - /// Minimum jump table density, in percent. - enum { MinJumpTableDensity = 40 }; - /// Check whether a range of clusters is dense enough for a jump table. bool isDense(const CaseClusterVector &Clusters, unsigned *TotalCases, - unsigned First, unsigned Last); + unsigned First, unsigned Last, unsigned MinDensity); /// Build a jump table cluster from Clusters[First..Last]. Returns false if it /// decides it's not a good idea. @@ -457,7 +455,14 @@ private: /// /// c. After we finish selecting the basic block, in FinishBasicBlock if /// the StackProtectorDescriptor attached to the SelectionDAGBuilder is - /// initialized, we first find a splice point in the parent basic block + /// initialized, we produce the validation code with one of these + /// techniques: + /// 1) with a call to a guard check function + /// 2) with inlined instrumentation + /// + /// 1) We insert a call to the check function before the terminator. + /// + /// 2) We first find a splice point in the parent basic block /// before the terminator and then splice the terminator of said basic /// block into the success basic block. Then we code-gen a new tail for /// the parent basic block consisting of the two loads, the comparison, @@ -467,29 +472,31 @@ private: /// the same function, use the same failure basic block). class StackProtectorDescriptor { public: - StackProtectorDescriptor() : ParentMBB(nullptr), SuccessMBB(nullptr), - FailureMBB(nullptr), Guard(nullptr), - GuardReg(0) { } + StackProtectorDescriptor() + : ParentMBB(nullptr), SuccessMBB(nullptr), FailureMBB(nullptr) {} /// Returns true if all fields of the stack protector descriptor are /// initialized implying that we should/are ready to emit a stack protector. bool shouldEmitStackProtector() const { - return ParentMBB && SuccessMBB && FailureMBB && Guard; + return ParentMBB && SuccessMBB && FailureMBB; + } + + bool shouldEmitFunctionBasedCheckStackProtector() const { + return ParentMBB && !SuccessMBB && !FailureMBB; } /// Initialize the stack protector descriptor structure for a new basic /// block. - void initialize(const BasicBlock *BB, - MachineBasicBlock *MBB, - const CallInst &StackProtCheckCall) { + void initialize(const BasicBlock *BB, MachineBasicBlock *MBB, + bool FunctionBasedInstrumentation) { // Make sure we are not initialized yet. assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is " "already initialized!"); ParentMBB = MBB; - SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true); - FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB); - if (!Guard) - Guard = StackProtCheckCall.getArgOperand(0); + if (!FunctionBasedInstrumentation) { + SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true); + FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB); + } } /// Reset state that changes when we handle different basic blocks. @@ -518,17 +525,11 @@ private: /// always the same. void resetPerFunctionState() { FailureMBB = nullptr; - Guard = nullptr; - GuardReg = 0; } MachineBasicBlock *getParentMBB() { return ParentMBB; } MachineBasicBlock *getSuccessMBB() { return SuccessMBB; } MachineBasicBlock *getFailureMBB() { return FailureMBB; } - const Value *getGuard() { return Guard; } - - unsigned getGuardReg() const { return GuardReg; } - void setGuardReg(unsigned R) { GuardReg = R; } private: /// The basic block for which we are generating the stack protector. @@ -548,13 +549,6 @@ private: /// contain a call to __stack_chk_fail(). MachineBasicBlock *FailureMBB; - /// The guard variable which we will compare against the stored value in the - /// stack protector stack slot. - const Value *Guard; - - /// The virtual register holding the stack guard value. - unsigned GuardReg; - /// Add a successor machine basic block to ParentMBB. If the successor mbb /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic /// block will be created. Assign a large weight if IsLikely is true. @@ -708,28 +702,88 @@ public: void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, const BasicBlock *EHPadBB = nullptr); - std::pair<SDValue, SDValue> lowerCallOperands( - ImmutableCallSite CS, - unsigned ArgIdx, - unsigned NumArgs, - SDValue Callee, - Type *ReturnTy, - const BasicBlock *EHPadBB = nullptr, - bool IsPatchPoint = false); + // Lower range metadata from 0 to N to assert zext to an integer of nearest + // floor power of two. + SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I, + SDValue Op); + + void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI, + ImmutableCallSite CS, unsigned ArgIdx, + unsigned NumArgs, SDValue Callee, + Type *ReturnTy, bool IsPatchPoint); + + std::pair<SDValue, SDValue> + lowerInvokable(TargetLowering::CallLoweringInfo &CLI, + const BasicBlock *EHPadBB = nullptr); /// UpdateSplitBlock - When an MBB was split during scheduling, update the /// references that need to refer to the last resulting block. void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); + /// Describes a gc.statepoint or a gc.statepoint like thing for the purposes + /// of lowering into a STATEPOINT node. + struct StatepointLoweringInfo { + /// Bases[i] is the base pointer for Ptrs[i]. Together they denote the set + /// of gc pointers this STATEPOINT has to relocate. + SmallVector<const Value *, 16> Bases; + SmallVector<const Value *, 16> Ptrs; + + /// The set of gc.relocate calls associated with this gc.statepoint. + SmallVector<const GCRelocateInst *, 16> GCRelocates; + + /// The full list of gc arguments to the gc.statepoint being lowered. + ArrayRef<const Use> GCArgs; + + /// The gc.statepoint instruction. + const Instruction *StatepointInstr = nullptr; + + /// The list of gc transition arguments present in the gc.statepoint being + /// lowered. + ArrayRef<const Use> GCTransitionArgs; + + /// The ID that the resulting STATEPOINT instruction has to report. + unsigned ID = -1; + + /// Information regarding the underlying call instruction. + TargetLowering::CallLoweringInfo CLI; + + /// The deoptimization state associated with this gc.statepoint call, if + /// any. + ArrayRef<const Use> DeoptState; + + /// Flags associated with the meta arguments being lowered. + uint64_t StatepointFlags = -1; + + /// The number of patchable bytes the call needs to get lowered into. + unsigned NumPatchBytes = -1; + + /// The exception handling unwind destination, in case this represents an + /// invoke of gc.statepoint. + const BasicBlock *EHPadBB = nullptr; + + explicit StatepointLoweringInfo(SelectionDAG &DAG) : CLI(DAG) {} + }; + + /// Lower \p SLI into a STATEPOINT instruction. + SDValue LowerAsSTATEPOINT(StatepointLoweringInfo &SLI); + // This function is responsible for the whole statepoint lowering process. // It uniformly handles invoke and call statepoints. void LowerStatepoint(ImmutableStatepoint Statepoint, const BasicBlock *EHPadBB = nullptr); -private: - std::pair<SDValue, SDValue> - lowerInvokable(TargetLowering::CallLoweringInfo &CLI, - const BasicBlock *EHPadBB = nullptr); + void LowerCallSiteWithDeoptBundle(ImmutableCallSite CS, SDValue Callee, + const BasicBlock *EHPadBB); + + void LowerDeoptimizeCall(const CallInst *CI); + void LowerDeoptimizingReturn(); + + void LowerCallSiteWithDeoptBundleImpl(ImmutableCallSite CS, SDValue Callee, + const BasicBlock *EHPadBB, + bool VarArgDisallowed, + bool ForceVoidReturnTy); + +private: // Terminator instructions. void visitRet(const ReturnInst &I); void visitBr(const BranchInst &I); @@ -840,6 +894,8 @@ private: bool visitBinaryFloatCall(const CallInst &I, unsigned Opcode); void visitAtomicLoad(const LoadInst &I); void visitAtomicStore(const StoreInst &I); + void visitLoadFromSwiftError(const LoadInst &I); + void visitStoreToSwiftError(const StoreInst &I); void visitInlineAsm(ImmutableCallSite CS); const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); @@ -853,10 +909,9 @@ private: void visitPatchpoint(ImmutableCallSite CS, const BasicBlock *EHPadBB = nullptr); - // These three are implemented in StatepointLowering.cpp - void visitStatepoint(const CallInst &I); + // These two are implemented in StatepointLowering.cpp void visitGCRelocate(const GCRelocateInst &I); - void visitGCResult(const CallInst &I); + void visitGCResult(const GCResultInst &I); void visitUserOp1(const Instruction &I) { llvm_unreachable("UserOp1 should not exist at instruction selection time!"); @@ -870,6 +925,8 @@ private: void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); + void emitInlineAsmError(ImmutableCallSite CS, const Twine &Message); + /// EmitFuncArgumentDbgValue - If V is an function argument then create /// corresponding DBG_VALUE machine instruction for it now. At the end of /// instruction selection, they will be inserted to the entry BB. @@ -937,8 +994,7 @@ struct RegsForValue { /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, - SDLoc dl, - SDValue &Chain, SDValue *Flag, + const SDLoc &dl, SDValue &Chain, SDValue *Flag, const Value *V = nullptr) const; /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the specified @@ -946,18 +1002,16 @@ struct RegsForValue { /// as the input and updates them for the output Chain/Flag. If the Flag /// pointer is nullptr, no flag is used. If V is not nullptr, then it is used /// in printing better diagnostic messages on error. - void - getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, - SDValue *Flag, const Value *V = nullptr, - ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const; + void getCopyToRegs(SDValue Val, SelectionDAG &DAG, const SDLoc &dl, + SDValue &Chain, SDValue *Flag, const Value *V = nullptr, + ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const; /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker, matching input operand index /// (if applicable), and includes the number of values added into it. - void AddInlineAsmOperands(unsigned Kind, - bool HasMatching, unsigned MatchingIdx, SDLoc dl, - SelectionDAG &DAG, - std::vector<SDValue> &Ops) const; + void AddInlineAsmOperands(unsigned Kind, bool HasMatching, + unsigned MatchingIdx, const SDLoc &dl, + SelectionDAG &DAG, std::vector<SDValue> &Ops) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index a1c6c4c..93ac6d6 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -101,10 +101,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; case ISD::RETURNADDR: return "RETURNADDR"; case ISD::FRAMEADDR: return "FRAMEADDR"; - case ISD::LOCAL_RECOVER: return "LOCAL_RECOVER"; + case ISD::LOCAL_RECOVER: return "LOCAL_RECOVER"; case ISD::READ_REGISTER: return "READ_REGISTER"; case ISD::WRITE_REGISTER: return "WRITE_REGISTER"; case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; + case ISD::EH_DWARF_CFA: return "EH_DWARF_CFA"; case ISD::EH_RETURN: return "EH_RETURN"; case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; @@ -202,6 +203,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FREM: return "frem"; case ISD::FCOPYSIGN: return "fcopysign"; case ISD::FGETSIGN: return "fgetsign"; + case ISD::FCANONICALIZE: return "fcanonicalize"; case ISD::FPOW: return "fpow"; case ISD::SMIN: return "smin"; case ISD::SMAX: return "smax"; @@ -378,7 +380,7 @@ static Printable PrintNodeId(const SDNode &Node) { }); } -void SDNode::dump() const { dump(nullptr); } +LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); } void SDNode::dump(const SelectionDAG *G) const { print(dbgs(), G); dbgs() << '\n'; @@ -590,7 +592,7 @@ static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { N->dump(G); } -void SelectionDAG::dump() const { +LLVM_DUMP_METHOD void SelectionDAG::dump() const { dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n"; for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); @@ -630,7 +632,7 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G, } } -typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet; +typedef SmallPtrSet<const SDNode *, 32> VisitedSDNodeSet; static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, const SelectionDAG *G, VisitedSDNodeSet &once) { if (!once.insert(N).second) // If we've been here before, return now. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index c075da4..1d61657 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" #include "llvm/ADT/PostOrderIterator.h" @@ -21,10 +21,10 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -32,8 +32,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" @@ -59,6 +59,7 @@ #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include <algorithm> + using namespace llvm; #define DEBUG_TYPE "isel" @@ -317,7 +318,7 @@ namespace llvm { "Unknown sched type!"); return createILPListDAGScheduler(IS, OptLevel); } -} +} // end namespace llvm // EmitInstrWithCustomInserter - This method should be implemented by targets // that mark instructions with the 'usesCustomInserter' flag. These @@ -329,7 +330,7 @@ namespace llvm { // are modified, the method should insert pairs of <OldSucc, NewSucc> into the // DenseMap. MachineBasicBlock * -TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, +TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const { #ifndef NDEBUG dbgs() << "If a target marks an instruction with " @@ -339,9 +340,9 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, llvm_unreachable(nullptr); } -void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, +void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const { - assert(!MI->hasPostISelHook() && + assert(!MI.hasPostISelHook() && "If a target marks an instruction with 'hasPostISelHook', " "it must implement TargetLowering::AdjustInstrPostInstrSelection!"); } @@ -376,6 +377,8 @@ SelectionDAGISel::~SelectionDAGISel() { void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<GCModuleInfo>(); + AU.addRequired<StackProtector>(); + AU.addPreserved<StackProtector>(); AU.addPreserved<GCModuleInfo>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); if (UseMBPI && OptLevel != CodeGenOpt::None) @@ -440,7 +443,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TM.resetTargetOptions(Fn); // Reset OptLevel to None for optnone functions. CodeGenOpt::Level NewOptLevel = OptLevel; - if (Fn.hasFnAttribute(Attribute::OptimizeNone)) + if (OptLevel != CodeGenOpt::None && skipFunction(Fn)) NewOptLevel = CodeGenOpt::None; OptLevelChanger OLC(*this, NewOptLevel); @@ -468,11 +471,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MF->setHasInlineAsm(false); FuncInfo->SplitCSR = false; - SmallVector<MachineBasicBlock*, 4> Returns; // We split CSR if the target supports it for the given function // and the function has only return exits. - if (TLI->supportSplitCSR(MF)) { + if (OptLevel != CodeGenOpt::None && TLI->supportSplitCSR(MF)) { FuncInfo->SplitCSR = true; // Collect all the return blocks. @@ -481,12 +483,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { continue; const TerminatorInst *Term = BB.getTerminator(); - if (isa<UnreachableInst>(Term)) + if (isa<UnreachableInst>(Term) || isa<ReturnInst>(Term)) continue; - if (isa<ReturnInst>(Term)) { - Returns.push_back(FuncInfo->MBBMap[&BB]); - continue; - } // Bail out if the exit block is not Return nor Unreachable. FuncInfo->SplitCSR = false; @@ -508,8 +506,21 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII); // Insert copies in the entry block and the return blocks. - if (FuncInfo->SplitCSR) + if (FuncInfo->SplitCSR) { + SmallVector<MachineBasicBlock*, 4> Returns; + // Collect all the return blocks. + for (MachineBasicBlock &MBB : mf) { + if (!MBB.succ_empty()) + continue; + + MachineBasicBlock::iterator Term = MBB.getFirstTerminator(); + if (Term != MBB.end() && Term->isReturn()) { + Returns.push_back(&MBB); + continue; + } + } TLI->insertCopiesSplitCSR(EntryMBB, Returns); + } DenseMap<unsigned, unsigned> LiveInMap; if (!FuncInfo->ArgDbgValues.empty()) @@ -669,7 +680,7 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, } void SelectionDAGISel::ComputeLiveOutVRegInfo() { - SmallPtrSet<SDNode*, 128> VisitedNodes; + SmallPtrSet<SDNode*, 16> VisitedNodes; SmallVector<SDNode*, 128> Worklist; Worklist.push_back(CurDAG->getRoot().getNode()); @@ -854,7 +865,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Scheduler->Run(CurDAG, FuncInfo->MBB); } - if (ViewSUnitDAGs && MatchFilterBB) Scheduler->viewGraph(); + if (ViewSUnitDAGs && MatchFilterBB) + Scheduler->viewGraph(); // Emit machine code to BB. This can change 'BB' to the last block being // inserted into. @@ -937,23 +949,7 @@ void SelectionDAGISel::DoInstructionSelection() { if (Node->use_empty()) continue; - SDNode *ResNode = Select(Node); - - // FIXME: This is pretty gross. 'Select' should be changed to not return - // anything at all and this code should be nuked with a tactical strike. - - // If node should not be replaced, continue with the next one. - if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE) - continue; - // Replace node. - if (ResNode) { - ReplaceUses(Node, ResNode); - } - - // If after the replacement this node is not used any more, - // remove this dead node. - if (Node->use_empty()) // Don't delete EntryToken, etc. - CurDAG->RemoveDeadNode(Node); + Select(Node); } CurDAG->setRoot(Dummy.getValue()); @@ -1147,7 +1143,125 @@ static void collectFailStats(const Instruction *I) { case Instruction::LandingPad: NumFastIselFailLandingPad++; return; } } -#endif +#endif // NDEBUG + +/// Set up SwiftErrorVals by going through the function. If the function has +/// swifterror argument, it will be the first entry. +static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI, + FunctionLoweringInfo *FuncInfo) { + if (!TLI->supportSwiftError()) + return; + + FuncInfo->SwiftErrorVals.clear(); + FuncInfo->SwiftErrorMap.clear(); + FuncInfo->SwiftErrorWorklist.clear(); + + // Check if function has a swifterror argument. + for (Function::const_arg_iterator AI = Fn.arg_begin(), AE = Fn.arg_end(); + AI != AE; ++AI) + if (AI->hasSwiftErrorAttr()) + FuncInfo->SwiftErrorVals.push_back(&*AI); + + for (const auto &LLVMBB : Fn) + for (const auto &Inst : LLVMBB) { + if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&Inst)) + if (Alloca->isSwiftError()) + FuncInfo->SwiftErrorVals.push_back(Alloca); + } +} + +/// For each basic block, merge incoming swifterror values or simply propagate +/// them. The merged results will be saved in SwiftErrorMap. For predecessors +/// that are not yet visited, we create virtual registers to hold the swifterror +/// values and save them in SwiftErrorWorklist. +static void mergeIncomingSwiftErrors(FunctionLoweringInfo *FuncInfo, + const TargetLowering *TLI, + const TargetInstrInfo *TII, + const BasicBlock *LLVMBB, + SelectionDAGBuilder *SDB) { + if (!TLI->supportSwiftError()) + return; + + // We should only do this when we have swifterror parameter or swifterror + // alloc. + if (FuncInfo->SwiftErrorVals.empty()) + return; + + // At beginning of a basic block, insert PHI nodes or get the virtual + // register from the only predecessor, and update SwiftErrorMap; if one + // of the predecessors is not visited, update SwiftErrorWorklist. + // At end of a basic block, if a block is in SwiftErrorWorklist, insert copy + // to sync up the virtual register assignment. + + // Always create a virtual register for each swifterror value in entry block. + auto &DL = SDB->DAG.getDataLayout(); + const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); + if (pred_begin(LLVMBB) == pred_end(LLVMBB)) { + for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) { + unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC); + // Assign Undef to Vreg. We construct MI directly to make sure it works + // with FastISel. + BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), VReg); + FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg); + } + return; + } + + if (auto *UniquePred = LLVMBB->getUniquePredecessor()) { + auto *UniquePredMBB = FuncInfo->MBBMap[UniquePred]; + if (!FuncInfo->SwiftErrorMap.count(UniquePredMBB)) { + // Update SwiftErrorWorklist with a new virtual register. + for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) { + unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC); + FuncInfo->SwiftErrorWorklist[UniquePredMBB].push_back(VReg); + // Propagate the information from the single predecessor. + FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg); + } + return; + } + // Propagate the information from the single predecessor. + FuncInfo->SwiftErrorMap[FuncInfo->MBB] = + FuncInfo->SwiftErrorMap[UniquePredMBB]; + return; + } + + // For the case of multiple predecessors, update SwiftErrorWorklist. + // Handle the case where we have two or more predecessors being the same. + for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB); + PI != PE; ++PI) { + auto *PredMBB = FuncInfo->MBBMap[*PI]; + if (!FuncInfo->SwiftErrorMap.count(PredMBB) && + !FuncInfo->SwiftErrorWorklist.count(PredMBB)) { + for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) { + unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC); + // When we actually visit the basic block PredMBB, we will materialize + // the virtual register assignment in copySwiftErrorsToFinalVRegs. + FuncInfo->SwiftErrorWorklist[PredMBB].push_back(VReg); + } + } + } + + // For the case of multiple predecessors, create a virtual register for + // each swifterror value and generate Phi node. + for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) { + unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC); + FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg); + + MachineInstrBuilder SwiftErrorPHI = BuildMI(*FuncInfo->MBB, + FuncInfo->MBB->begin(), SDB->getCurDebugLoc(), + TII->get(TargetOpcode::PHI), VReg); + for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB); + PI != PE; ++PI) { + auto *PredMBB = FuncInfo->MBBMap[*PI]; + unsigned SwiftErrorReg = FuncInfo->SwiftErrorMap.count(PredMBB) ? + FuncInfo->SwiftErrorMap[PredMBB][I] : + FuncInfo->SwiftErrorWorklist[PredMBB][I]; + SwiftErrorPHI.addReg(SwiftErrorReg) + .addMBB(PredMBB); + } + } +} void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. @@ -1155,6 +1269,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (TM.Options.EnableFastISel) FastIS = TLI->createFastISel(*FuncInfo, LibInfo); + setupSwiftErrorVals(Fn, TLI, FuncInfo); + // Iterate over all basic blocks in the function. ReversePostOrderTraversal<const Function*> RPOT(&Fn); for (ReversePostOrderTraversal<const Function*>::rpo_iterator @@ -1193,6 +1309,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (!FuncInfo->MBB) continue; // Some blocks like catchpads have no code or MBB. FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); + mergeIncomingSwiftErrors(FuncInfo, TLI, TII, LLVMBB, SDB); // Setup an EH landing-pad block. FuncInfo->ExceptionPointerVirtReg = 0; @@ -1228,7 +1345,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // where they are, so we can be sure to emit subsequent instructions // after them. if (FuncInfo->InsertPt != FuncInfo->MBB->begin()) - FastIS->setLastLocalValue(std::prev(FuncInfo->InsertPt)); + FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt)); else FastIS->setLastLocalValue(nullptr); } @@ -1345,6 +1462,12 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { LowerArguments(Fn); } } + if (getAnalysis<StackProtector>().shouldEmitSDCheck(*LLVMBB)) { + bool FunctionBasedInstrumentation = + TLI->getSSPStackGuardCheck(*Fn.getParent()); + SDB->SPDescriptor.initialize(LLVMBB, FuncInfo->MBBMap[LLVMBB], + FunctionBasedInstrumentation); + } if (Begin != BI) ++NumDAGBlocks; @@ -1376,15 +1499,15 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { /// terminator instructors so we can satisfy ABI constraints. A partial /// terminator sequence is an improper subset of a terminator sequence (i.e. it /// may be the whole terminator sequence). -static bool MIIsInTerminatorSequence(const MachineInstr *MI) { +static bool MIIsInTerminatorSequence(const MachineInstr &MI) { // If we do not have a copy or an implicit def, we return true if and only if // MI is a debug value. - if (!MI->isCopy() && !MI->isImplicitDef()) + if (!MI.isCopy() && !MI.isImplicitDef()) // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the // physical registers if there is debug info associated with the terminator // of our mbb. We want to include said debug info in our terminator // sequence, so we return true in that case. - return MI->isDebugValue(); + return MI.isDebugValue(); // We have left the terminator sequence if we are not doing one of the // following: @@ -1394,18 +1517,18 @@ static bool MIIsInTerminatorSequence(const MachineInstr *MI) { // 3. Defining a register via an implicit def. // OPI should always be a register definition... - MachineInstr::const_mop_iterator OPI = MI->operands_begin(); + MachineInstr::const_mop_iterator OPI = MI.operands_begin(); if (!OPI->isReg() || !OPI->isDef()) return false; // Defining any register via an implicit def is always ok. - if (MI->isImplicitDef()) + if (MI.isImplicitDef()) return true; // Grab the copy source... MachineInstr::const_mop_iterator OPI2 = OPI; ++OPI2; - assert(OPI2 != MI->operands_end() + assert(OPI2 != MI.operands_end() && "Should have a copy implying we should have 2 arguments."); // Make sure that the copy dest is not a vreg when the copy source is a @@ -1432,7 +1555,7 @@ static bool MIIsInTerminatorSequence(const MachineInstr *MI) { /// terminator, but additionally the copies that move the vregs into the /// physical registers. static MachineBasicBlock::iterator -FindSplitPointForStackProtector(MachineBasicBlock *BB, DebugLoc DL) { +FindSplitPointForStackProtector(MachineBasicBlock *BB) { MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator(); // if (SplitPoint == BB->begin()) @@ -1442,7 +1565,7 @@ FindSplitPointForStackProtector(MachineBasicBlock *BB, DebugLoc DL) { MachineBasicBlock::iterator Previous = SplitPoint; --Previous; - while (MIIsInTerminatorSequence(Previous)) { + while (MIIsInTerminatorSequence(*Previous)) { SplitPoint = Previous; if (Previous == Start) break; @@ -1454,7 +1577,6 @@ FindSplitPointForStackProtector(MachineBasicBlock *BB, DebugLoc DL) { void SelectionDAGISel::FinishBasicBlock() { - DEBUG(dbgs() << "Total amount of phi nodes to update: " << FuncInfo->PHINodesToUpdate.size() << "\n"; for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) @@ -1474,7 +1596,23 @@ SelectionDAGISel::FinishBasicBlock() { } // Handle stack protector. - if (SDB->SPDescriptor.shouldEmitStackProtector()) { + if (SDB->SPDescriptor.shouldEmitFunctionBasedCheckStackProtector()) { + // The target provides a guard check function. There is no need to + // generate error handling code or to split current basic block. + MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB(); + + // Add load and check to the basicblock. + FuncInfo->MBB = ParentMBB; + FuncInfo->InsertPt = + FindSplitPointForStackProtector(ParentMBB); + SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + // Clear the Per-BB State. + SDB->SPDescriptor.resetPerBBState(); + } else if (SDB->SPDescriptor.shouldEmitStackProtector()) { MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB(); MachineBasicBlock *SuccessMBB = SDB->SPDescriptor.getSuccessMBB(); @@ -1485,7 +1623,7 @@ SelectionDAGISel::FinishBasicBlock() { // register allocation issues caused by us splitting the parent mbb. The // register allocator will clean up said virtual copies later on. MachineBasicBlock::iterator SplitPoint = - FindSplitPointForStackProtector(ParentMBB, SDB->getCurDebugLoc()); + FindSplitPointForStackProtector(ParentMBB); // Splice the terminator of ParentMBB into SuccessMBB. SuccessMBB->splice(SuccessMBB->end(), ParentMBB, @@ -1502,7 +1640,7 @@ SelectionDAGISel::FinishBasicBlock() { // CodeGen Failure MBB if we have not codegened it yet. MachineBasicBlock *FailureMBB = SDB->SPDescriptor.getFailureMBB(); - if (!FailureMBB->size()) { + if (FailureMBB->empty()) { FuncInfo->MBB = FailureMBB; FuncInfo->InsertPt = FailureMBB->end(); SDB->visitSPDescriptorFailure(SDB->SPDescriptor); @@ -1515,52 +1653,61 @@ SelectionDAGISel::FinishBasicBlock() { SDB->SPDescriptor.resetPerBBState(); } - for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) { + // Lower each BitTestBlock. + for (auto &BTB : SDB->BitTestCases) { // Lower header first, if it wasn't already lowered - if (!SDB->BitTestCases[i].Emitted) { + if (!BTB.Emitted) { // Set the current basic block to the mbb we wish to insert the code into - FuncInfo->MBB = SDB->BitTestCases[i].Parent; + FuncInfo->MBB = BTB.Parent; FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code - SDB->visitBitTestHeader(SDB->BitTestCases[i], FuncInfo->MBB); + SDB->visitBitTestHeader(BTB, FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); } - BranchProbability UnhandledProb = SDB->BitTestCases[i].Prob; - for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { - UnhandledProb -= SDB->BitTestCases[i].Cases[j].ExtraProb; + BranchProbability UnhandledProb = BTB.Prob; + for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) { + UnhandledProb -= BTB.Cases[j].ExtraProb; // Set the current basic block to the mbb we wish to insert the code into - FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB; + FuncInfo->MBB = BTB.Cases[j].ThisBB; FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code // If all cases cover a contiguous range, it is not necessary to jump to // the default block after the last bit test fails. This is because the // range check during bit test header creation has guaranteed that every - // case here doesn't go outside the range. + // case here doesn't go outside the range. In this case, there is no need + // to perform the last bit test, as it will always be true. Instead, make + // the second-to-last bit-test fall through to the target of the last bit + // test, and delete the last bit test. + MachineBasicBlock *NextMBB; - if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej) - NextMBB = SDB->BitTestCases[i].Cases[j + 1].TargetBB; - else if (j + 1 != ej) - NextMBB = SDB->BitTestCases[i].Cases[j + 1].ThisBB; - else - NextMBB = SDB->BitTestCases[i].Default; + if (BTB.ContiguousRange && j + 2 == ej) { + // Second-to-last bit-test with contiguous range: fall through to the + // target of the final bit test. + NextMBB = BTB.Cases[j + 1].TargetBB; + } else if (j + 1 == ej) { + // For the last bit test, fall through to Default. + NextMBB = BTB.Default; + } else { + // Otherwise, fall through to the next bit test. + NextMBB = BTB.Cases[j + 1].ThisBB; + } - SDB->visitBitTestCase(SDB->BitTestCases[i], - NextMBB, - UnhandledProb, - SDB->BitTestCases[i].Reg, - SDB->BitTestCases[i].Cases[j], + SDB->visitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); CodeGenAndEmitDAG(); - if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej) + if (BTB.ContiguousRange && j + 2 == ej) { + // Since we're not going to use the final bit test, remove it. + BTB.Cases.pop_back(); break; + } } // Update PHI Nodes @@ -1571,16 +1718,18 @@ SelectionDAGISel::FinishBasicBlock() { assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); // This is "default" BB. We have two jumps to it. From "header" BB and - // from last "case" BB. - if (PHIBB == SDB->BitTestCases[i].Default) - PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second) - .addMBB(SDB->BitTestCases[i].Parent) - .addReg(FuncInfo->PHINodesToUpdate[pi].second) - .addMBB(SDB->BitTestCases[i].Cases.back().ThisBB); + // from last "case" BB, unless the latter was skipped. + if (PHIBB == BTB.Default) { + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(BTB.Parent); + if (!BTB.ContiguousRange) { + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second) + .addMBB(BTB.Cases.back().ThisBB); + } + } // One of "cases" BB. - for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); + for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) { - MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB; + MachineBasicBlock* cBB = BTB.Cases[j].ThisBB; if (cBB->isSuccessor(PHIBB)) PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(cBB); } @@ -1685,7 +1834,6 @@ SelectionDAGISel::FinishBasicBlock() { SDB->SwitchCases.clear(); } - /// Create the scheduler. If a specific scheduler was specified /// via the SchedulerRegistry, use it, otherwise select the /// one preferred by the target. @@ -1764,8 +1912,8 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, /// SelectInlineAsmMemoryOperands - Calls to this are automatically generated /// by tblgen. Others should not call it. -void SelectionDAGISel:: -SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, SDLoc DL) { +void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, + const SDLoc &DL) { std::vector<SDValue> InOps; std::swap(InOps, Ops); @@ -1802,15 +1950,15 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, SDLoc DL) { // Otherwise, this is a memory operand. Ask the target to select it. std::vector<SDValue> SelOps; - if (SelectInlineAsmMemoryOperand(InOps[i+1], - InlineAsm::getMemoryConstraintID(Flags), - SelOps)) + unsigned ConstraintID = InlineAsm::getMemoryConstraintID(Flags); + if (SelectInlineAsmMemoryOperand(InOps[i+1], ConstraintID, SelOps)) report_fatal_error("Could not match memory address. Inline asm" " failure!"); // Add this to the output node. unsigned NewFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size()); + NewFlags = InlineAsm::getFlagWordForMem(NewFlags, ConstraintID); Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32)); Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); i += 2; @@ -1956,7 +2104,7 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains); } -SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { +void SelectionDAGISel::Select_INLINEASM(SDNode *N) { SDLoc DL(N); std::vector<SDValue> Ops(N->op_begin(), N->op_end()); @@ -1965,11 +2113,11 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { const EVT VTs[] = {MVT::Other, MVT::Glue}; SDValue New = CurDAG->getNode(ISD::INLINEASM, DL, VTs, Ops); New->setNodeId(-1); - return New.getNode(); + ReplaceUses(N, New.getNode()); + CurDAG->RemoveDeadNode(N); } -SDNode -*SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { +void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { SDLoc dl(Op); MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); @@ -1979,11 +2127,11 @@ SDNode SDValue New = CurDAG->getCopyFromReg( Op->getOperand(0), dl, Reg, Op->getValueType(0)); New->setNodeId(-1); - return New.getNode(); + ReplaceUses(Op, New.getNode()); + CurDAG->RemoveDeadNode(Op); } -SDNode -*SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) { +void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) { SDLoc dl(Op); MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); @@ -1993,13 +2141,12 @@ SDNode SDValue New = CurDAG->getCopyToReg( Op->getOperand(0), dl, Reg, Op->getOperand(2)); New->setNodeId(-1); - return New.getNode(); + ReplaceUses(Op, New.getNode()); + CurDAG->RemoveDeadNode(Op); } - - -SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) { - return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0)); +void SelectionDAGISel::Select_UNDEF(SDNode *N) { + CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); } /// GetVBR - decode a vbr encoding whose top bit is set. @@ -2019,15 +2166,11 @@ GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { return Val; } - -/// UpdateChainsAndGlue - When a match is complete, this method updates uses of -/// interior glue and chain results to use the new glue and chain results. -void SelectionDAGISel:: -UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, - const SmallVectorImpl<SDNode*> &ChainNodesMatched, - SDValue InputGlue, - const SmallVectorImpl<SDNode*> &GlueResultNodesMatched, - bool isMorphNodeTo) { +/// When a match is complete, this method updates uses of interior chain results +/// to use the new results. +void SelectionDAGISel::UpdateChains( + SDNode *NodeToMatch, SDValue InputChain, + const SmallVectorImpl<SDNode *> &ChainNodesMatched, bool isMorphNodeTo) { SmallVector<SDNode*, 4> NowDeadNodes; // Now that all the normal results are replaced, we replace the chain and @@ -2039,10 +2182,8 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, // Replace all the chain results with the final chain we ended up with. for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { SDNode *ChainNode = ChainNodesMatched[i]; - - // If this node was already deleted, don't look at it. - if (ChainNode->getOpcode() == ISD::DELETED_NODE) - continue; + assert(ChainNode->getOpcode() != ISD::DELETED_NODE && + "Deleted node left in chain"); // Don't replace the results of the root node if we're doing a // MorphNodeTo. @@ -2056,35 +2197,12 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain); // If the node became dead and we haven't already seen it, delete it. - if (ChainNode->use_empty() && + if (ChainNode != NodeToMatch && ChainNode->use_empty() && !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode)) NowDeadNodes.push_back(ChainNode); } } - // If the result produces glue, update any glue results in the matched - // pattern with the glue result. - if (InputGlue.getNode()) { - // Handle any interior nodes explicitly marked. - for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) { - SDNode *FRN = GlueResultNodesMatched[i]; - - // If this node was already deleted, don't look at it. - if (FRN->getOpcode() == ISD::DELETED_NODE) - continue; - - assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue && - "Doesn't have a glue result"); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1), - InputGlue); - - // If the node became dead and we haven't already seen it, delete it. - if (FRN->use_empty() && - !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN)) - NowDeadNodes.push_back(FRN); - } - } - if (!NowDeadNodes.empty()) CurDAG->RemoveDeadNodes(NowDeadNodes); @@ -2108,8 +2226,9 @@ enum ChainResult { /// already selected nodes "below" us. static ChainResult WalkChainUsers(const SDNode *ChainedNode, - SmallVectorImpl<SDNode*> &ChainedNodesInPattern, - SmallVectorImpl<SDNode*> &InteriorChainedNodes) { + SmallVectorImpl<SDNode *> &ChainedNodesInPattern, + DenseMap<const SDNode *, ChainResult> &TokenFactorResult, + SmallVectorImpl<SDNode *> &InteriorChainedNodes) { ChainResult Result = CR_Simple; for (SDNode::use_iterator UI = ChainedNode->use_begin(), @@ -2190,7 +2309,15 @@ WalkChainUsers(const SDNode *ChainedNode, // as a new TokenFactor. // // To distinguish these two cases, do a recursive walk down the uses. - switch (WalkChainUsers(User, ChainedNodesInPattern, InteriorChainedNodes)) { + auto MemoizeResult = TokenFactorResult.find(User); + bool Visited = MemoizeResult != TokenFactorResult.end(); + // Recursively walk chain users only if the result is not memoized. + if (!Visited) { + auto Res = WalkChainUsers(User, ChainedNodesInPattern, TokenFactorResult, + InteriorChainedNodes); + MemoizeResult = TokenFactorResult.insert(std::make_pair(User, Res)).first; + } + switch (MemoizeResult->second) { case CR_Simple: // If the uses of the TokenFactor are just already-selected nodes, ignore // it, it is "below" our pattern. @@ -2210,9 +2337,10 @@ WalkChainUsers(const SDNode *ChainedNode, // ultimate chain result of the generated code. We will also add its chain // inputs as inputs to the ultimate TokenFactor we create. Result = CR_LeadsToInteriorNode; - ChainedNodesInPattern.push_back(User); - InteriorChainedNodes.push_back(User); - continue; + if (!Visited) { + ChainedNodesInPattern.push_back(User); + InteriorChainedNodes.push_back(User); + } } return Result; @@ -2227,12 +2355,16 @@ WalkChainUsers(const SDNode *ChainedNode, static SDValue HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, SelectionDAG *CurDAG) { + // Used for memoization. Without it WalkChainUsers could take exponential + // time to run. + DenseMap<const SDNode *, ChainResult> TokenFactorResult; // Walk all of the chained nodes we've matched, recursively scanning down the // users of the chain result. This adds any TokenFactor nodes that are caught // in between chained nodes to the chained and interior nodes list. SmallVector<SDNode*, 3> InteriorChainedNodes; for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched, + TokenFactorResult, InteriorChainedNodes) == CR_InducesCycle) return SDValue(); // Would induce a cycle. } @@ -2322,8 +2454,10 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, // Otherwise, no replacement happened because the node already exists. Replace // Uses of the old node with the new one. - if (Res != Node) + if (Res != Node) { CurDAG->ReplaceAllUsesWith(Node, Res); + CurDAG->RemoveDeadNode(Node); + } return Res; } @@ -2534,7 +2668,6 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, } namespace { - struct MatchScope { /// FailIndex - If this match fails, this is the index to continue with. unsigned FailIndex; @@ -2552,7 +2685,7 @@ struct MatchScope { SDValue InputChain, InputGlue; /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty. - bool HasChainNodesMatched, HasGlueResultNodesMatched; + bool HasChainNodesMatched; }; /// \\brief A DAG update listener to keep the matching state @@ -2591,11 +2724,11 @@ public: J.setNode(E); } }; -} +} // end anonymous namespace -SDNode *SelectionDAGISel:: -SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, - unsigned TableSize) { +void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, + const unsigned char *MatcherTable, + unsigned TableSize) { // FIXME: Should these even be selected? Handle these cases in the caller? switch (NodeToMatch->getOpcode()) { default: @@ -2623,16 +2756,25 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case ISD::LIFETIME_START: case ISD::LIFETIME_END: NodeToMatch->setNodeId(-1); // Mark selected. - return nullptr; + return; case ISD::AssertSext: case ISD::AssertZext: CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0)); - return nullptr; - case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch); - case ISD::READ_REGISTER: return Select_READ_REGISTER(NodeToMatch); - case ISD::WRITE_REGISTER: return Select_WRITE_REGISTER(NodeToMatch); - case ISD::UNDEF: return Select_UNDEF(NodeToMatch); + CurDAG->RemoveDeadNode(NodeToMatch); + return; + case ISD::INLINEASM: + Select_INLINEASM(NodeToMatch); + return; + case ISD::READ_REGISTER: + Select_READ_REGISTER(NodeToMatch); + return; + case ISD::WRITE_REGISTER: + Select_WRITE_REGISTER(NodeToMatch); + return; + case ISD::UNDEF: + Select_UNDEF(NodeToMatch); + return; } assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); @@ -2665,7 +2807,6 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // which ones they are. The result is captured into this list so that we can // update the chain results when the pattern is complete. SmallVector<SDNode*, 3> ChainNodesMatched; - SmallVector<SDNode*, 3> GlueResultNodesMatched; DEBUG(dbgs() << "ISEL: Starting pattern match on root node: "; NodeToMatch->dump(CurDAG); @@ -2771,7 +2912,6 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, NewEntry.InputChain = InputChain; NewEntry.InputGlue = InputGlue; NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty(); - NewEntry.HasGlueResultNodesMatched = !GlueResultNodesMatched.empty(); MatchScopes.push_back(NewEntry); continue; } @@ -2816,6 +2956,18 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, continue; } + case OPC_MoveChild0: case OPC_MoveChild1: + case OPC_MoveChild2: case OPC_MoveChild3: + case OPC_MoveChild4: case OPC_MoveChild5: + case OPC_MoveChild6: case OPC_MoveChild7: { + unsigned ChildNo = Opcode-OPC_MoveChild0; + if (ChildNo >= N.getNumOperands()) + break; // Match fails if out of range child #. + N = N.getOperand(ChildNo); + NodeStack.push_back(N); + continue; + } + case OPC_MoveParent: // Pop the current node off the NodeStack. NodeStack.pop_back(); @@ -3028,12 +3180,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (Imm->getOpcode() == ISD::Constant) { const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue(); - Imm = CurDAG->getConstant(*Val, SDLoc(NodeToMatch), Imm.getValueType(), - true); + Imm = CurDAG->getTargetConstant(*Val, SDLoc(NodeToMatch), + Imm.getValueType()); } else if (Imm->getOpcode() == ISD::ConstantFP) { const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue(); - Imm = CurDAG->getConstantFP(*Val, SDLoc(NodeToMatch), - Imm.getValueType(), true); + Imm = CurDAG->getTargetConstantFP(*Val, SDLoc(NodeToMatch), + Imm.getValueType()); } RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second)); @@ -3041,7 +3193,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, } case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0 - case OPC_EmitMergeInputChains1_1: { // OPC_EmitMergeInputChains, 1, 1 + case OPC_EmitMergeInputChains1_1: // OPC_EmitMergeInputChains, 1, 1 + case OPC_EmitMergeInputChains1_2: { // OPC_EmitMergeInputChains, 1, 2 // These are space-optimized forms of OPC_EmitMergeInputChains. assert(!InputChain.getNode() && "EmitMergeInputChains should be the first chain producing node"); @@ -3049,7 +3202,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, "Should only have one EmitMergeInputChains per match"); // Read all of the chained nodes. - unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1; + unsigned RecNo = Opcode - OPC_EmitMergeInputChains1_0; assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); @@ -3137,13 +3290,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, continue; } - case OPC_EmitNode: - case OPC_MorphNodeTo: { + case OPC_EmitNode: case OPC_MorphNodeTo: + case OPC_EmitNode0: case OPC_EmitNode1: case OPC_EmitNode2: + case OPC_MorphNodeTo0: case OPC_MorphNodeTo1: case OPC_MorphNodeTo2: { uint16_t TargetOpc = MatcherTable[MatcherIndex++]; TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; unsigned EmitNodeInfo = MatcherTable[MatcherIndex++]; // Get the result VT list. - unsigned NumVTs = MatcherTable[MatcherIndex++]; + unsigned NumVTs; + // If this is one of the compressed forms, get the number of VTs based + // on the Opcode. Otherwise read the next byte from the table. + if (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2) + NumVTs = Opcode - OPC_MorphNodeTo0; + else if (Opcode >= OPC_EmitNode0 && Opcode <= OPC_EmitNode2) + NumVTs = Opcode - OPC_EmitNode0; + else + NumVTs = MatcherTable[MatcherIndex++]; SmallVector<EVT, 4> VTs; for (unsigned i = 0; i != NumVTs; ++i) { MVT::SimpleValueType VT = @@ -3205,7 +3367,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // Create the node. SDNode *Res = nullptr; - if (Opcode != OPC_MorphNodeTo) { + bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo || + (Opcode >= OPC_MorphNodeTo0 && Opcode <= OPC_MorphNodeTo2); + if (!IsMorphNodeTo) { // If this is a normal EmitNode command, just create the new node and // add the results to the RecordedNodes list. Res = CurDAG->getMachineNode(TargetOpc, SDLoc(NodeToMatch), @@ -3218,13 +3382,17 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, nullptr)); } - } else if (NodeToMatch->getOpcode() != ISD::DELETED_NODE) { - Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops, EmitNodeInfo); } else { - // NodeToMatch was eliminated by CSE when the target changed the DAG. - // We will visit the equivalent node later. - DEBUG(dbgs() << "Node was eliminated by CSE\n"); - return nullptr; + assert(NodeToMatch->getOpcode() != ISD::DELETED_NODE && + "NodeToMatch was removed partway through selection"); + SelectionDAG::DAGNodeDeletedListener NDL(*CurDAG, [&](SDNode *N, + SDNode *E) { + auto &Chain = ChainNodesMatched; + assert((!E || llvm::find(Chain, N) == Chain.end()) && + "Chain node replaced during MorphNode"); + Chain.erase(std::remove(Chain.begin(), Chain.end(), N), Chain.end()); + }); + Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops, EmitNodeInfo); } // If the node had chain/glue results, update our notion of the current @@ -3285,31 +3453,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, } DEBUG(dbgs() << " " - << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created") + << (IsMorphNodeTo ? "Morphed" : "Created") << " node: "; Res->dump(CurDAG); dbgs() << "\n"); // If this was a MorphNodeTo then we're completely done! - if (Opcode == OPC_MorphNodeTo) { - // Update chain and glue uses. - UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched, - InputGlue, GlueResultNodesMatched, true); - return Res; - } - - continue; - } - - case OPC_MarkGlueResults: { - unsigned NumNodes = MatcherTable[MatcherIndex++]; - - // Read and remember all the glue-result nodes. - for (unsigned i = 0; i != NumNodes; ++i) { - unsigned RecNo = MatcherTable[MatcherIndex++]; - if (RecNo & 128) - RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); - - assert(RecNo < RecordedNodes.size() && "Invalid MarkGlueResults"); - GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); + if (IsMorphNodeTo) { + // Update chain uses. + UpdateChains(Res, InputChain, ChainNodesMatched, true); + return; } continue; } @@ -3341,20 +3492,24 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res); } - // If the root node defines glue, add it to the glue nodes to update list. - if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Glue) - GlueResultNodesMatched.push_back(NodeToMatch); + // Update chain uses. + UpdateChains(NodeToMatch, InputChain, ChainNodesMatched, false); - // Update chain and glue uses. - UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched, - InputGlue, GlueResultNodesMatched, false); + // If the root node defines glue, we need to update it to the glue result. + // TODO: This never happens in our tests and I think it can be removed / + // replaced with an assert, but if we do it this the way the change is + // NFC. + if (NodeToMatch->getValueType(NodeToMatch->getNumValues() - 1) == + MVT::Glue && + InputGlue.getNode()) + CurDAG->ReplaceAllUsesOfValueWith( + SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1), InputGlue); assert(NodeToMatch->use_empty() && "Didn't replace all uses of the node?"); + CurDAG->RemoveDeadNode(NodeToMatch); - // FIXME: We just return here, which interacts correctly with SelectRoot - // above. We should fix this to not return an SDNode* anymore. - return nullptr; + return; } } @@ -3366,7 +3521,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, while (1) { if (MatchScopes.empty()) { CannotYetSelect(NodeToMatch); - return nullptr; + return; } // Restore the interpreter state back to the point where the scope was @@ -3387,8 +3542,6 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, InputGlue = LastScope.InputGlue; if (!LastScope.HasChainNodesMatched) ChainNodesMatched.clear(); - if (!LastScope.HasGlueResultNodesMatched) - GlueResultNodesMatched.clear(); // Check to see what the offset is at the new MatcherIndex. If it is zero // we have reached the end of this scope, otherwise we have another child @@ -3411,8 +3564,6 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, } } - - void SelectionDAGISel::CannotYetSelect(SDNode *N) { std::string msg; raw_string_ostream Msg(msg); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp index 00db942..55f70f7 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp @@ -1,4 +1,4 @@ -//===-- TargetSelectionDAGInfo.cpp - SelectionDAG Info --------------------===// +//===-- SelectionDAGTargetInfo.cpp - SelectionDAG Info --------------------===// // // The LLVM Compiler Infrastructure // @@ -7,13 +7,11 @@ // //===----------------------------------------------------------------------===// // -// This implements the TargetSelectionDAGInfo class. +// This implements the SelectionDAGTargetInfo class. // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetSelectionDAGInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" using namespace llvm; -TargetSelectionDAGInfo::~TargetSelectionDAGInfo() { -} +SelectionDAGTargetInfo::~SelectionDAGTargetInfo() {} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 02545a7..90aaba2 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -53,13 +53,10 @@ void StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) { "Trying to visit statepoint before finished processing previous one"); Locations.clear(); NextSlotToAllocate = 0; - // Need to resize this on each safepoint - we need the two to stay in - // sync and the clear patterns of a SelectionDAGBuilder have no relation - // to FunctionLoweringInfo. + // Need to resize this on each safepoint - we need the two to stay in sync and + // the clear patterns of a SelectionDAGBuilder have no relation to + // FunctionLoweringInfo. SmallBitVector::reset initializes all bits to false. AllocatedStackSlots.resize(Builder.FuncInfo.StatepointStackSlots.size()); - for (size_t i = 0; i < AllocatedStackSlots.size(); i++) { - AllocatedStackSlots[i] = false; - } } void StatepointLoweringState::clear() { @@ -72,49 +69,46 @@ void StatepointLoweringState::clear() { SDValue StatepointLoweringState::allocateStackSlot(EVT ValueType, SelectionDAGBuilder &Builder) { - NumSlotsAllocatedForStatepoints++; + auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo(); - // The basic scheme here is to first look for a previously created stack slot - // which is not in use (accounting for the fact arbitrary slots may already - // be reserved), or to create a new stack slot and use it. - - // If this doesn't succeed in 40000 iterations, something is seriously wrong - for (int i = 0; i < 40000; i++) { - assert(Builder.FuncInfo.StatepointStackSlots.size() == - AllocatedStackSlots.size() && - "broken invariant"); - const size_t NumSlots = AllocatedStackSlots.size(); - assert(NextSlotToAllocate <= NumSlots && "broken invariant"); - - if (NextSlotToAllocate >= NumSlots) { - assert(NextSlotToAllocate == NumSlots); - // record stats - if (NumSlots + 1 > StatepointMaxSlotsRequired) { - StatepointMaxSlotsRequired = NumSlots + 1; - } + unsigned SpillSize = ValueType.getSizeInBits() / 8; + assert((SpillSize * 8) == ValueType.getSizeInBits() && "Size not in bytes?"); - SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType); - const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); - auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo(); - MFI->markAsStatepointSpillSlotObjectIndex(FI); + // First look for a previously created stack slot which is not in + // use (accounting for the fact arbitrary slots may already be + // reserved), or to create a new stack slot and use it. - Builder.FuncInfo.StatepointStackSlots.push_back(FI); - AllocatedStackSlots.push_back(true); - return SpillSlot; - } - if (!AllocatedStackSlots[NextSlotToAllocate]) { + const size_t NumSlots = AllocatedStackSlots.size(); + assert(NextSlotToAllocate <= NumSlots && "Broken invariant"); + + // The stack slots in StatepointStackSlots beyond the first NumSlots were + // added in this instance of StatepointLoweringState, and cannot be re-used. + assert(NumSlots <= Builder.FuncInfo.StatepointStackSlots.size() && + "Broken invariant"); + + for (; NextSlotToAllocate < NumSlots; NextSlotToAllocate++) { + if (!AllocatedStackSlots.test(NextSlotToAllocate)) { const int FI = Builder.FuncInfo.StatepointStackSlots[NextSlotToAllocate]; - AllocatedStackSlots[NextSlotToAllocate] = true; - return Builder.DAG.getFrameIndex(FI, ValueType); + if (MFI->getObjectSize(FI) == SpillSize) { + AllocatedStackSlots.set(NextSlotToAllocate); + return Builder.DAG.getFrameIndex(FI, ValueType); + } } - // Note: We deliberately choose to advance this only on the failing path. - // Doing so on the succeeding path involves a bit of complexity that caused - // a minor bug previously. Unless performance shows this matters, please - // keep this code as simple as possible. - NextSlotToAllocate++; } - llvm_unreachable("infinite loop?"); + + // Couldn't find a free slot, so create a new one: + + SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType); + const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); + MFI->markAsStatepointSpillSlotObjectIndex(FI); + + Builder.FuncInfo.StatepointStackSlots.push_back(FI); + + StatepointMaxSlotsRequired = std::max<unsigned long>( + StatepointMaxSlotsRequired, Builder.FuncInfo.StatepointStackSlots.size()); + + return SpillSlot; } /// Utility function for reservePreviousStackSlotForValue. Tries to find @@ -125,24 +119,23 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, int LookUpDepth) { // Can not look any further - give up now if (LookUpDepth <= 0) - return Optional<int>(); + return None; // Spill location is known for gc relocates if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) { - FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = - Builder.FuncInfo.StatepointRelocatedValues[Relocate->getStatepoint()]; + const auto &SpillMap = + Builder.FuncInfo.StatepointSpillMaps[Relocate->getStatepoint()]; auto It = SpillMap.find(Relocate->getDerivedPtr()); if (It == SpillMap.end()) - return Optional<int>(); + return None; return It->second; } // Look through bitcast instructions. - if (const BitCastInst *Cast = dyn_cast<BitCastInst>(Val)) { + if (const BitCastInst *Cast = dyn_cast<BitCastInst>(Val)) return findPreviousSpillSlot(Cast->getOperand(0), Builder, LookUpDepth - 1); - } // Look through phi nodes // All incoming values should have same known stack slot, otherwise result @@ -154,10 +147,10 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, Optional<int> SpillSlot = findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1); if (!SpillSlot.hasValue()) - return Optional<int>(); + return None; if (MergedResult.hasValue() && *MergedResult != *SpillSlot) - return Optional<int>(); + return None; MergedResult = SpillSlot; } @@ -192,7 +185,7 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, // which we visit values is unspecified. // Don't know any information about this instruction - return Optional<int>(); + return None; } /// Try to find existing copies of the incoming values in stack slots used for @@ -213,7 +206,7 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, SDValue OldLocation = Builder.StatepointLowering.getLocation(Incoming); if (OldLocation.getNode()) - // duplicates in input + // Duplicates in input return; const int LookUpDepth = 6; @@ -222,14 +215,14 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, if (!Index.hasValue()) return; - auto Itr = std::find(Builder.FuncInfo.StatepointStackSlots.begin(), - Builder.FuncInfo.StatepointStackSlots.end(), *Index); - assert(Itr != Builder.FuncInfo.StatepointStackSlots.end() && - "value spilled to the unknown stack slot"); + const auto &StatepointSlots = Builder.FuncInfo.StatepointStackSlots; + + auto SlotIt = find(StatepointSlots, *Index); + assert(SlotIt != StatepointSlots.end() && + "Value spilled to the unknown stack slot"); // This is one of our dedicated lowering slots - const int Offset = - std::distance(Builder.FuncInfo.StatepointStackSlots.begin(), Itr); + const int Offset = std::distance(StatepointSlots.begin(), SlotIt); if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) { // stack slot already assigned to someone else, can't use it! // TODO: currently we reserve space for gc arguments after doing @@ -252,24 +245,30 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, /// is not required for correctness. It's purpose is to reduce the size of /// StackMap section. It has no effect on the number of spill slots required /// or the actual lowering. -static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases, - SmallVectorImpl<const Value *> &Ptrs, - SmallVectorImpl<const Value *> &Relocs, - SelectionDAGBuilder &Builder) { - - // This is horribly inefficient, but I don't care right now - SmallSet<SDValue, 64> Seen; - - SmallVector<const Value *, 64> NewBases, NewPtrs, NewRelocs; - for (size_t i = 0; i < Ptrs.size(); i++) { +static void +removeDuplicateGCPtrs(SmallVectorImpl<const Value *> &Bases, + SmallVectorImpl<const Value *> &Ptrs, + SmallVectorImpl<const GCRelocateInst *> &Relocs, + SelectionDAGBuilder &Builder, + FunctionLoweringInfo::StatepointSpillMap &SSM) { + DenseMap<SDValue, const Value *> Seen; + + SmallVector<const Value *, 64> NewBases, NewPtrs; + SmallVector<const GCRelocateInst *, 64> NewRelocs; + for (size_t i = 0, e = Ptrs.size(); i < e; i++) { SDValue SD = Builder.getValue(Ptrs[i]); - // Only add non-duplicates - if (Seen.count(SD) == 0) { + auto SeenIt = Seen.find(SD); + + if (SeenIt == Seen.end()) { + // Only add non-duplicates NewBases.push_back(Bases[i]); NewPtrs.push_back(Ptrs[i]); NewRelocs.push_back(Relocs[i]); + Seen[SD] = Ptrs[i]; + } else { + // Duplicate pointer found, note in SSM and move on: + SSM.DuplicateMap[Ptrs[i]] = SeenIt->second; } - Seen.insert(SD); } assert(Bases.size() >= NewBases.size()); assert(Ptrs.size() >= NewPtrs.size()); @@ -284,43 +283,13 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases, /// Extract call from statepoint, lower it and return pointer to the /// call node. Also update NodeMap so that getValue(statepoint) will /// reference lowered call result -static SDNode * -lowerCallFromStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB, - SelectionDAGBuilder &Builder, - SmallVectorImpl<SDValue> &PendingExports) { - - ImmutableCallSite CS(ISP.getCallSite()); - - SDValue ActualCallee; - - if (ISP.getNumPatchBytes() > 0) { - // If we've been asked to emit a nop sequence instead of a call instruction - // for this statepoint then don't lower the call target, but use a constant - // `null` instead. Not lowering the call target lets statepoint clients get - // away without providing a physical address for the symbolic call target at - // link time. - - const auto &TLI = Builder.DAG.getTargetLoweringInfo(); - const auto &DL = Builder.DAG.getDataLayout(); - - unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace(); - ActualCallee = Builder.DAG.getConstant(0, Builder.getCurSDLoc(), - TLI.getPointerTy(DL, AS)); - } else - ActualCallee = Builder.getValue(ISP.getCalledValue()); - - assert(CS.getCallingConv() != CallingConv::AnyReg && - "anyregcc is not supported on statepoints!"); - - Type *DefTy = ISP.getActualReturnType(); - bool HasDef = !DefTy->isVoidTy(); +static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo( + SelectionDAGBuilder::StatepointLoweringInfo &SI, + SelectionDAGBuilder &Builder, SmallVectorImpl<SDValue> &PendingExports) { SDValue ReturnValue, CallEndVal; - std::tie(ReturnValue, CallEndVal) = Builder.lowerCallOperands( - ISP.getCallSite(), ImmutableStatepoint::CallArgsBeginPos, - ISP.getNumCallArgs(), ActualCallee, DefTy, EHPadBB, - false /* IsPatchPoint */); - + std::tie(ReturnValue, CallEndVal) = + Builder.lowerInvokable(SI.CLI, SI.EHPadBB); SDNode *CallEnd = CallEndVal.getNode(); // Get a call instruction from the call sequence chain. Tail calls are not @@ -339,6 +308,7 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB, // to grab the return value from the return register(s), or it can be a LOAD // to load a value returned by reference via a stack slot. + bool HasDef = !SI.CLI.RetTy->isVoidTy(); if (HasDef) { if (CallEnd->getOpcode() == ISD::LOAD) CallEnd = CallEnd->getOperand(0).getNode(); @@ -348,70 +318,7 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB, } assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!"); - - // Export the result value if needed - const Instruction *GCResult = ISP.getGCResult(); - if (HasDef && GCResult) { - if (GCResult->getParent() != CS.getParent()) { - // Result value will be used in a different basic block so we need to - // export it now. - // Default exporting mechanism will not work here because statepoint call - // has a different type than the actual call. It means that by default - // llvm will create export register of the wrong type (always i32 in our - // case). So instead we need to create export register with correct type - // manually. - // TODO: To eliminate this problem we can remove gc.result intrinsics - // completely and make statepoint call to return a tuple. - unsigned Reg = Builder.FuncInfo.CreateRegs(ISP.getActualReturnType()); - RegsForValue RFV( - *Builder.DAG.getContext(), Builder.DAG.getTargetLoweringInfo(), - Builder.DAG.getDataLayout(), Reg, ISP.getActualReturnType()); - SDValue Chain = Builder.DAG.getEntryNode(); - - RFV.getCopyToRegs(ReturnValue, Builder.DAG, Builder.getCurSDLoc(), Chain, - nullptr); - PendingExports.push_back(Chain); - Builder.FuncInfo.ValueMap[CS.getInstruction()] = Reg; - } else { - // Result value will be used in a same basic block. Don't export it or - // perform any explicit register copies. - // We'll replace the actuall call node shortly. gc_result will grab - // this value. - Builder.setValue(CS.getInstruction(), ReturnValue); - } - } else { - // The token value is never used from here on, just generate a poison value - Builder.setValue(CS.getInstruction(), - Builder.DAG.getIntPtrConstant(-1, Builder.getCurSDLoc())); - } - - return CallEnd->getOperand(0).getNode(); -} - -/// Callect all gc pointers coming into statepoint intrinsic, clean them up, -/// and return two arrays: -/// Bases - base pointers incoming to this statepoint -/// Ptrs - derived pointers incoming to this statepoint -/// Relocs - the gc_relocate corresponding to each base/ptr pair -/// Elements of this arrays should be in one-to-one correspondence with each -/// other i.e Bases[i], Ptrs[i] are from the same gcrelocate call -static void getIncomingStatepointGCValues( - SmallVectorImpl<const Value *> &Bases, SmallVectorImpl<const Value *> &Ptrs, - SmallVectorImpl<const Value *> &Relocs, ImmutableStatepoint StatepointSite, - SelectionDAGBuilder &Builder) { - for (const GCRelocateInst *Relocate : StatepointSite.getRelocates()) { - Relocs.push_back(Relocate); - Bases.push_back(Relocate->getBasePtr()); - Ptrs.push_back(Relocate->getDerivedPtr()); - } - - // Remove any redundant llvm::Values which map to the same SDValue as another - // input. Also has the effect of removing duplicates in the original - // llvm::Value input list as well. This is a useful optimization for - // reducing the size of the StackMap section. It has no other impact. - removeDuplicatesGCPtrs(Bases, Ptrs, Relocs, Builder); - - assert(Bases.size() == Ptrs.size() && Ptrs.size() == Relocs.size()); + return std::make_pair(ReturnValue, CallEnd->getOperand(0).getNode()); } /// Spill a value incoming to the statepoint. It might be either part of @@ -429,7 +336,6 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, if (!Loc.getNode()) { Loc = Builder.StatepointLowering.allocateStackSlot(Incoming.getValueType(), Builder); - assert(isa<FrameIndexSDNode>(Loc)); int Index = cast<FrameIndexSDNode>(Loc)->getIndex(); // We use TargetFrameIndex so that isel will not select it into LEA Loc = Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType()); @@ -437,10 +343,22 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, // TODO: We can create TokenFactor node instead of // chaining stores one after another, this may allow // a bit more optimal scheduling for them + +#ifndef NDEBUG + // Right now we always allocate spill slots that are of the same + // size as the value we're about to spill (the size of spillee can + // vary since we spill vectors of pointers too). At some point we + // can consider allowing spills of smaller values to larger slots + // (i.e. change the '==' in the assert below to a '>='). + auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo(); + assert((MFI->getObjectSize(Index) * 8) == + Incoming.getValueType().getSizeInBits() && + "Bad spill: stack slot does not match!"); +#endif + Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc, MachinePointerInfo::getFixedStack( - Builder.DAG.getMachineFunction(), Index), - false, false, 0); + Builder.DAG.getMachineFunction(), Index)); Builder.StatepointLowering.setLocation(Incoming, Loc); } @@ -478,8 +396,7 @@ static void lowerIncomingStatepointValue(SDValue Incoming, // spill location. This would be a useful optimization, but would // need to be optional since it requires a lot of complexity on the // runtime side which not all would support. - std::pair<SDValue, SDValue> Res = - spillIncomingStatepointValue(Incoming, Chain, Builder); + auto Res = spillIncomingStatepointValue(Incoming, Chain, Builder); Ops.push_back(Res.first); Chain = Res.second; } @@ -494,43 +411,37 @@ static void lowerIncomingStatepointValue(SDValue Incoming, /// completion, 'Ops' will contain ready to use operands for machine code /// statepoint. The chain nodes will have already been created and the DAG root /// will be set to the last value spilled (if any were). -static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, - ImmutableStatepoint StatepointSite, - SelectionDAGBuilder &Builder) { - - // Lower the deopt and gc arguments for this statepoint. Layout will - // be: deopt argument length, deopt arguments.., gc arguments... - - SmallVector<const Value *, 64> Bases, Ptrs, Relocations; - getIncomingStatepointGCValues(Bases, Ptrs, Relocations, StatepointSite, - Builder); - +static void +lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, + SelectionDAGBuilder::StatepointLoweringInfo &SI, + SelectionDAGBuilder &Builder) { + // Lower the deopt and gc arguments for this statepoint. Layout will be: + // deopt argument length, deopt arguments.., gc arguments... #ifndef NDEBUG - // Check that each of the gc pointer and bases we've gotten out of the - // safepoint is something the strategy thinks might be a pointer (or vector - // of pointers) into the GC heap. This is basically just here to help catch - // errors during statepoint insertion. TODO: This should actually be in the - // Verifier, but we can't get to the GCStrategy from there (yet). - GCStrategy &S = Builder.GFI->getStrategy(); - for (const Value *V : Bases) { - auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); - if (Opt.hasValue()) { - assert(Opt.getValue() && - "non gc managed base pointer found in statepoint"); - } - } - for (const Value *V : Ptrs) { - auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); - if (Opt.hasValue()) { - assert(Opt.getValue() && - "non gc managed derived pointer found in statepoint"); + if (auto *GFI = Builder.GFI) { + // Check that each of the gc pointer and bases we've gotten out of the + // safepoint is something the strategy thinks might be a pointer (or vector + // of pointers) into the GC heap. This is basically just here to help catch + // errors during statepoint insertion. TODO: This should actually be in the + // Verifier, but we can't get to the GCStrategy from there (yet). + GCStrategy &S = GFI->getStrategy(); + for (const Value *V : SI.Bases) { + auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); + if (Opt.hasValue()) { + assert(Opt.getValue() && + "non gc managed base pointer found in statepoint"); + } } - } - for (const Value *V : Relocations) { - auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); - if (Opt.hasValue()) { - assert(Opt.getValue() && "non gc managed pointer relocated"); + for (const Value *V : SI.Ptrs) { + auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); + if (Opt.hasValue()) { + assert(Opt.getValue() && + "non gc managed derived pointer found in statepoint"); + } } + } else { + assert(SI.Bases.empty() && "No gc specified, so cannot relocate pointers!"); + assert(SI.Ptrs.empty() && "No gc specified, so cannot relocate pointers!"); } #endif @@ -539,30 +450,23 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // particular value. This is purely an optimization over the code below and // doesn't change semantics at all. It is important for performance that we // reserve slots for both deopt and gc values before lowering either. - for (const Value *V : StatepointSite.vm_state_args()) { + for (const Value *V : SI.DeoptState) { reservePreviousStackSlotForValue(V, Builder); } - for (unsigned i = 0; i < Bases.size(); ++i) { - reservePreviousStackSlotForValue(Bases[i], Builder); - reservePreviousStackSlotForValue(Ptrs[i], Builder); + for (unsigned i = 0; i < SI.Bases.size(); ++i) { + reservePreviousStackSlotForValue(SI.Bases[i], Builder); + reservePreviousStackSlotForValue(SI.Ptrs[i], Builder); } // First, prefix the list with the number of unique values to be // lowered. Note that this is the number of *Values* not the // number of SDValues required to lower them. - const int NumVMSArgs = StatepointSite.getNumTotalVMSArgs(); + const int NumVMSArgs = SI.DeoptState.size(); pushStackMapConstant(Ops, Builder, NumVMSArgs); - assert(NumVMSArgs == std::distance(StatepointSite.vm_state_begin(), - StatepointSite.vm_state_end())); - - // The vm state arguments are lowered in an opaque manner. We do - // not know what type of values are contained within. We skip the - // first one since that happens to be the total number we lowered - // explicitly just above. We could have left it in the loop and - // not done it explicitly, but it's far easier to understand this - // way. - for (const Value *V : StatepointSite.vm_state_args()) { + // The vm state arguments are lowered in an opaque manner. We do not know + // what type of values are contained within. + for (const Value *V : SI.DeoptState) { SDValue Incoming = Builder.getValue(V); lowerIncomingStatepointValue(Incoming, Ops, Builder); } @@ -572,11 +476,11 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // arrays interwoven with each (lowered) base pointer immediately followed by // it's (lowered) derived pointer. i.e // (base[0], ptr[0], base[1], ptr[1], ...) - for (unsigned i = 0; i < Bases.size(); ++i) { - const Value *Base = Bases[i]; + for (unsigned i = 0; i < SI.Bases.size(); ++i) { + const Value *Base = SI.Bases[i]; lowerIncomingStatepointValue(Builder.getValue(Base), Ops, Builder); - const Value *Ptr = Ptrs[i]; + const Value *Ptr = SI.Ptrs[i]; lowerIncomingStatepointValue(Builder.getValue(Ptr), Ops, Builder); } @@ -585,7 +489,7 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // allocas and give control over placement to the consumer. In this case, // it is the contents of the slot which may get updated, not the pointer to // the alloca - for (Value *V : StatepointSite.gc_args()) { + for (Value *V : SI.GCArgs) { SDValue Incoming = Builder.getValue(V); if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { // This handles allocas as arguments to the statepoint @@ -597,18 +501,16 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // Record computed locations for all lowered values. // This can not be embedded in lowering loops as we need to record *all* // values, while previous loops account only values with unique SDValues. - const Instruction *StatepointInstr = - StatepointSite.getCallSite().getInstruction(); - FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = - Builder.FuncInfo.StatepointRelocatedValues[StatepointInstr]; + const Instruction *StatepointInstr = SI.StatepointInstr; + auto &SpillMap = Builder.FuncInfo.StatepointSpillMaps[StatepointInstr]; - for (const GCRelocateInst *Relocate : StatepointSite.getRelocates()) { + for (const GCRelocateInst *Relocate : SI.GCRelocates) { const Value *V = Relocate->getDerivedPtr(); SDValue SDV = Builder.getValue(V); SDValue Loc = Builder.StatepointLowering.getLocation(SDV); if (Loc.getNode()) { - SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex(); + SpillMap.SlotMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex(); } else { // Record value as visited, but not spilled. This is case for allocas // and constants. For this values we can avoid emitting spill load while @@ -616,7 +518,7 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // Actually we do not need to record them in this map at all. // We do this only to check that we are not relocating any unvisited // value. - SpillMap[V] = None; + SpillMap.SlotMap[V] = None; // Default llvm mechanisms for exporting values which are used in // different basic blocks does not work for gc relocates. @@ -630,16 +532,8 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, } } -void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { - // Check some preconditions for sanity - assert(isStatepoint(&CI) && - "function called must be the statepoint function"); - - LowerStatepoint(ImmutableStatepoint(&CI)); -} - -void SelectionDAGBuilder::LowerStatepoint( - ImmutableStatepoint ISP, const BasicBlock *EHPadBB /*= nullptr*/) { +SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( + SelectionDAGBuilder::StatepointLoweringInfo &SI) { // The basic scheme here is that information about both the original call and // the safepoint is encoded in the CallInst. We create a temporary call and // lower it, then reverse engineer the calling sequence. @@ -648,36 +542,36 @@ void SelectionDAGBuilder::LowerStatepoint( // Clear state StatepointLowering.startNewStatepoint(*this); - ImmutableCallSite CS(ISP.getCallSite()); - #ifndef NDEBUG - // Consistency check. Check only relocates in the same basic block as thier - // statepoint. - for (const User *U : CS->users()) { - const CallInst *Call = cast<CallInst>(U); - if (isa<GCRelocateInst>(Call) && Call->getParent() == CS.getParent()) - StatepointLowering.scheduleRelocCall(*Call); - } + // We schedule gc relocates before removeDuplicateGCPtrs since we _will_ + // encounter the duplicate gc relocates we elide in removeDuplicateGCPtrs. + for (auto *Reloc : SI.GCRelocates) + if (Reloc->getParent() == SI.StatepointInstr->getParent()) + StatepointLowering.scheduleRelocCall(*Reloc); #endif -#ifndef NDEBUG - // If this is a malformed statepoint, report it early to simplify debugging. - // This should catch any IR level mistake that's made when constructing or - // transforming statepoints. - ISP.verify(); - - // Check that the associated GCStrategy expects to encounter statepoints. - assert(GFI->getStrategy().useStatepoints() && - "GCStrategy does not expect to encounter statepoints"); -#endif + // Remove any redundant llvm::Values which map to the same SDValue as another + // input. Also has the effect of removing duplicates in the original + // llvm::Value input list as well. This is a useful optimization for + // reducing the size of the StackMap section. It has no other impact. + removeDuplicateGCPtrs(SI.Bases, SI.Ptrs, SI.GCRelocates, *this, + FuncInfo.StatepointSpillMaps[SI.StatepointInstr]); + assert(SI.Bases.size() == SI.Ptrs.size() && + SI.Ptrs.size() == SI.GCRelocates.size()); // Lower statepoint vmstate and gcstate arguments SmallVector<SDValue, 10> LoweredMetaArgs; - lowerStatepointMetaArgs(LoweredMetaArgs, ISP, *this); + lowerStatepointMetaArgs(LoweredMetaArgs, SI, *this); + + // Now that we've emitted the spills, we need to update the root so that the + // call sequence is ordered correctly. + SI.CLI.setChain(getRoot()); // Get call node, we will replace it later with statepoint - SDNode *CallNode = - lowerCallFromStatepoint(ISP, EHPadBB, *this, PendingExports); + SDValue ReturnVal; + SDNode *CallNode; + std::tie(ReturnVal, CallNode) = + lowerCallFromStatepointLoweringInfo(SI, *this, PendingExports); // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END // nodes with all the appropriate arguments and return values. @@ -700,8 +594,8 @@ void SelectionDAGBuilder::LowerStatepoint( // followed by a SRCVALUE for the pointer that may be used during lowering // (e.g. to form MachinePointerInfo values for loads/stores). const bool IsGCTransition = - (ISP.getFlags() & (uint64_t)StatepointFlags::GCTransition) == - (uint64_t)StatepointFlags::GCTransition; + (SI.StatepointFlags & (uint64_t)StatepointFlags::GCTransition) == + (uint64_t)StatepointFlags::GCTransition; if (IsGCTransition) { SmallVector<SDValue, 8> TSOps; @@ -709,7 +603,7 @@ void SelectionDAGBuilder::LowerStatepoint( TSOps.push_back(Chain); // Add GC transition arguments - for (const Value *V : ISP.gc_transition_args()) { + for (const Value *V : SI.GCTransitionArgs) { TSOps.push_back(getValue(V)); if (V->getType()->isPointerTy()) TSOps.push_back(DAG.getSrcValue(V)); @@ -734,9 +628,9 @@ void SelectionDAGBuilder::LowerStatepoint( SmallVector<SDValue, 40> Ops; // Add the <id> and <numBytes> constants. - Ops.push_back(DAG.getTargetConstant(ISP.getID(), getCurSDLoc(), MVT::i64)); + Ops.push_back(DAG.getTargetConstant(SI.ID, getCurSDLoc(), MVT::i64)); Ops.push_back( - DAG.getTargetConstant(ISP.getNumPatchBytes(), getCurSDLoc(), MVT::i32)); + DAG.getTargetConstant(SI.NumPatchBytes, getCurSDLoc(), MVT::i32)); // Calculate and push starting position of vmstate arguments // Get number of arguments incoming directly into call node @@ -758,13 +652,12 @@ void SelectionDAGBuilder::LowerStatepoint( Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt); // Add a constant argument for the calling convention - pushStackMapConstant(Ops, *this, CS.getCallingConv()); + pushStackMapConstant(Ops, *this, SI.CLI.CallConv); // Add a constant argument for the flags - uint64_t Flags = ISP.getFlags(); - assert( - ((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0) - && "unknown flag used"); + uint64_t Flags = SI.StatepointFlags; + assert(((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0) && + "Unknown flag used"); pushStackMapConstant(Ops, *this, Flags); // Insert all vmstate and gcstate arguments @@ -800,7 +693,7 @@ void SelectionDAGBuilder::LowerStatepoint( TEOps.push_back(SDValue(StatepointMCNode, 0)); // Add GC transition arguments - for (const Value *V : ISP.gc_transition_args()) { + for (const Value *V : SI.GCTransitionArgs) { TEOps.push_back(getValue(V)); if (V->getType()->isPointerTy()) TEOps.push_back(DAG.getSrcValue(V)); @@ -830,19 +723,154 @@ void SelectionDAGBuilder::LowerStatepoint( // return value of each gc.relocate to the respective output of the // previously emitted STATEPOINT value. Unfortunately, this doesn't appear // to actually be possible today. + + return ReturnVal; +} + +void +SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, + const BasicBlock *EHPadBB /*= nullptr*/) { + assert(ISP.getCallSite().getCallingConv() != CallingConv::AnyReg && + "anyregcc is not supported on statepoints!"); + +#ifndef NDEBUG + // If this is a malformed statepoint, report it early to simplify debugging. + // This should catch any IR level mistake that's made when constructing or + // transforming statepoints. + ISP.verify(); + + // Check that the associated GCStrategy expects to encounter statepoints. + assert(GFI->getStrategy().useStatepoints() && + "GCStrategy does not expect to encounter statepoints"); +#endif + + SDValue ActualCallee; + + if (ISP.getNumPatchBytes() > 0) { + // If we've been asked to emit a nop sequence instead of a call instruction + // for this statepoint then don't lower the call target, but use a constant + // `null` instead. Not lowering the call target lets statepoint clients get + // away without providing a physical address for the symbolic call target at + // link time. + + const auto &TLI = DAG.getTargetLoweringInfo(); + const auto &DL = DAG.getDataLayout(); + + unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace(); + ActualCallee = DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(DL, AS)); + } else { + ActualCallee = getValue(ISP.getCalledValue()); + } + + StatepointLoweringInfo SI(DAG); + populateCallLoweringInfo(SI.CLI, ISP.getCallSite(), + ImmutableStatepoint::CallArgsBeginPos, + ISP.getNumCallArgs(), ActualCallee, + ISP.getActualReturnType(), false /* IsPatchPoint */); + + for (const GCRelocateInst *Relocate : ISP.getRelocates()) { + SI.GCRelocates.push_back(Relocate); + SI.Bases.push_back(Relocate->getBasePtr()); + SI.Ptrs.push_back(Relocate->getDerivedPtr()); + } + + SI.GCArgs = ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end()); + SI.StatepointInstr = ISP.getInstruction(); + SI.GCTransitionArgs = + ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end()); + SI.ID = ISP.getID(); + SI.DeoptState = ArrayRef<const Use>(ISP.vm_state_begin(), ISP.vm_state_end()); + SI.StatepointFlags = ISP.getFlags(); + SI.NumPatchBytes = ISP.getNumPatchBytes(); + SI.EHPadBB = EHPadBB; + + SDValue ReturnValue = LowerAsSTATEPOINT(SI); + + // Export the result value if needed + const GCResultInst *GCResult = ISP.getGCResult(); + Type *RetTy = ISP.getActualReturnType(); + if (!RetTy->isVoidTy() && GCResult) { + if (GCResult->getParent() != ISP.getCallSite().getParent()) { + // Result value will be used in a different basic block so we need to + // export it now. Default exporting mechanism will not work here because + // statepoint call has a different type than the actual call. It means + // that by default llvm will create export register of the wrong type + // (always i32 in our case). So instead we need to create export register + // with correct type manually. + // TODO: To eliminate this problem we can remove gc.result intrinsics + // completely and make statepoint call to return a tuple. + unsigned Reg = FuncInfo.CreateRegs(RetTy); + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), + DAG.getDataLayout(), Reg, RetTy); + SDValue Chain = DAG.getEntryNode(); + + RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr); + PendingExports.push_back(Chain); + FuncInfo.ValueMap[ISP.getInstruction()] = Reg; + } else { + // Result value will be used in a same basic block. Don't export it or + // perform any explicit register copies. + // We'll replace the actuall call node shortly. gc_result will grab + // this value. + setValue(ISP.getInstruction(), ReturnValue); + } + } else { + // The token value is never used from here on, just generate a poison value + setValue(ISP.getInstruction(), DAG.getIntPtrConstant(-1, getCurSDLoc())); + } +} + +void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl( + ImmutableCallSite CS, SDValue Callee, const BasicBlock *EHPadBB, + bool VarArgDisallowed, bool ForceVoidReturnTy) { + StatepointLoweringInfo SI(DAG); + unsigned ArgBeginIndex = CS.arg_begin() - CS.getInstruction()->op_begin(); + populateCallLoweringInfo( + SI.CLI, CS, ArgBeginIndex, CS.getNumArgOperands(), Callee, + ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : CS.getType(), + false); + if (!VarArgDisallowed) + SI.CLI.IsVarArg = CS.getFunctionType()->isVarArg(); + + auto DeoptBundle = *CS.getOperandBundle(LLVMContext::OB_deopt); + + unsigned DefaultID = StatepointDirectives::DeoptBundleStatepointID; + + auto SD = parseStatepointDirectivesFromAttrs(CS.getAttributes()); + SI.ID = SD.StatepointID.getValueOr(DefaultID); + SI.NumPatchBytes = SD.NumPatchBytes.getValueOr(0); + + SI.DeoptState = + ArrayRef<const Use>(DeoptBundle.Inputs.begin(), DeoptBundle.Inputs.end()); + SI.StatepointFlags = static_cast<uint64_t>(StatepointFlags::None); + SI.EHPadBB = EHPadBB; + + // NB! The GC arguments are deliberately left empty. + + if (SDValue ReturnVal = LowerAsSTATEPOINT(SI)) { + const Instruction *Inst = CS.getInstruction(); + ReturnVal = lowerRangeToAssertZExt(DAG, *Inst, ReturnVal); + setValue(Inst, ReturnVal); + } } -void SelectionDAGBuilder::visitGCResult(const CallInst &CI) { +void SelectionDAGBuilder::LowerCallSiteWithDeoptBundle( + ImmutableCallSite CS, SDValue Callee, const BasicBlock *EHPadBB) { + LowerCallSiteWithDeoptBundleImpl(CS, Callee, EHPadBB, + /* VarArgDisallowed = */ false, + /* ForceVoidReturnTy = */ false); +} + +void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) { // The result value of the gc_result is simply the result of the actual // call. We've already emitted this, so just grab the value. - Instruction *I = cast<Instruction>(CI.getArgOperand(0)); - assert(isStatepoint(I) && "first argument must be a statepoint token"); + const Instruction *I = CI.getStatepoint(); if (I->getParent() != CI.getParent()) { // Statepoint is in different basic block so we should have stored call // result in a virtual register. // We can not use default getValue() functionality to copy value from this - // register because statepoint and actuall call return types can be + // register because statepoint and actual call return types can be // different, and getValue() will use CopyFromReg of the wrong type, // which is always i32 in our case. PointerType *CalleeType = cast<PointerType>( @@ -864,20 +892,21 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { // We skip this check for relocates not in the same basic block as thier // statepoint. It would be too expensive to preserve validation info through // different basic blocks. - if (Relocate.getStatepoint()->getParent() == Relocate.getParent()) { + if (Relocate.getStatepoint()->getParent() == Relocate.getParent()) StatepointLowering.relocCallVisited(Relocate); - } + + auto *Ty = Relocate.getType()->getScalarType(); + if (auto IsManaged = GFI->getStrategy().isGCManagedPointer(Ty)) + assert(*IsManaged && "Non gc managed pointer relocated!"); #endif const Value *DerivedPtr = Relocate.getDerivedPtr(); SDValue SD = getValue(DerivedPtr); - FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = - FuncInfo.StatepointRelocatedValues[Relocate.getStatepoint()]; - - // We should have recorded location for this pointer - assert(SpillMap.count(DerivedPtr) && "Relocating not lowered gc value"); - Optional<int> DerivedPtrLocation = SpillMap[DerivedPtr]; + auto &SpillMap = FuncInfo.StatepointSpillMaps[Relocate.getStatepoint()]; + auto SlotIt = SpillMap.find(DerivedPtr); + assert(SlotIt != SpillMap.end() && "Relocating not lowered gc value"); + Optional<int> DerivedPtrLocation = SlotIt->second; // We didn't need to spill these special cases (constants and allocas). // See the handling in spillIncomingValueForStatepoint for detail. @@ -897,8 +926,7 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { SDValue SpillLoad = DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), - *DerivedPtrLocation), - false, false, false, 0); + *DerivedPtrLocation)); // Again, be conservative, don't emit pending loads DAG.setRoot(SpillLoad.getValue(1)); @@ -906,3 +934,25 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { assert(SpillLoad.getNode()); setValue(&Relocate, SpillLoad); } + +void SelectionDAGBuilder::LowerDeoptimizeCall(const CallInst *CI) { + const auto &TLI = DAG.getTargetLoweringInfo(); + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::DEOPTIMIZE), + TLI.getPointerTy(DAG.getDataLayout())); + + // We don't lower calls to __llvm_deoptimize as varargs, but as a regular + // call. We also do not lower the return value to any virtual register, and + // change the immediately following return to a trap instruction. + LowerCallSiteWithDeoptBundleImpl(CI, Callee, /* EHPadBB = */ nullptr, + /* VarArgDisallowed = */ true, + /* ForceVoidReturnTy = */ true); +} + +void SelectionDAGBuilder::LowerDeoptimizingReturn() { + // We do not lower the return value from llvm.deoptimize to any virtual + // register, and change the immediately following return to a trap + // instruction. + if (DAG.getTarget().Options.TrapUnreachable) + DAG.setRoot( + DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h index 82d0c62..b043184 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h @@ -16,9 +16,9 @@ #define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include <vector> namespace llvm { class SelectionDAGBuilder; @@ -45,15 +45,17 @@ public: /// statepoint. Will return SDValue() if this value hasn't been /// spilled. Otherwise, the value has already been spilled and no /// further action is required by the caller. - SDValue getLocation(SDValue val) { - if (!Locations.count(val)) + SDValue getLocation(SDValue Val) { + auto I = Locations.find(Val); + if (I == Locations.end()) return SDValue(); - return Locations[val]; + return I->second; } - void setLocation(SDValue val, SDValue Location) { - assert(!Locations.count(val) && + + void setLocation(SDValue Val, SDValue Location) { + assert(!Locations.count(Val) && "Trying to allocate already allocated location"); - Locations[val] = Location; + Locations[Val] = Location; } /// Record the fact that we expect to encounter a given gc_relocate @@ -62,16 +64,15 @@ public: void scheduleRelocCall(const CallInst &RelocCall) { PendingGCRelocateCalls.push_back(&RelocCall); } + /// Remove this gc_relocate from the list we're expecting to see /// before the next statepoint. If we weren't expecting to see /// it, we'll report an assertion. void relocCallVisited(const CallInst &RelocCall) { - SmallVectorImpl<const CallInst *>::iterator itr = - std::find(PendingGCRelocateCalls.begin(), PendingGCRelocateCalls.end(), - &RelocCall); - assert(itr != PendingGCRelocateCalls.end() && + auto I = find(PendingGCRelocateCalls, &RelocCall); + assert(I != PendingGCRelocateCalls.end() && "Visited unexpected gcrelocate call"); - PendingGCRelocateCalls.erase(itr); + PendingGCRelocateCalls.erase(I); } // TODO: Should add consistency tracking to ensure we encounter @@ -84,14 +85,15 @@ public: void reserveStackSlot(int Offset) { assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() && "out of bounds"); - assert(!AllocatedStackSlots[Offset] && "already reserved!"); + assert(!AllocatedStackSlots.test(Offset) && "already reserved!"); assert(NextSlotToAllocate <= (unsigned)Offset && "consistency!"); - AllocatedStackSlots[Offset] = true; + AllocatedStackSlots.set(Offset); } + bool isStackSlotAllocated(int Offset) { assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() && "out of bounds"); - return AllocatedStackSlots[Offset]; + return AllocatedStackSlots.test(Offset); } private: @@ -103,7 +105,7 @@ private: /// whether it has been used in the current statepoint. Since we try to /// preserve stack slots across safepoints, there can be gaps in which /// slots have been allocated. - SmallVector<bool, 50> AllocatedStackSlots; + SmallBitVector AllocatedStackSlots; /// Points just beyond the last slot known to have been allocated unsigned NextSlotToAllocate; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index c64d882..806646f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -14,10 +14,11 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -25,7 +26,6 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -43,6 +43,10 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { return nullptr; } +bool TargetLowering::isPositionIndependent() const { + return getTargetMachine().isPositionIndependent(); +} + /// Check whether a given call node is in tail position within its function. If /// so, it sets Chain to the input chain of the tail call. bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, @@ -65,6 +69,31 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, return isUsedByReturnOnly(Node, Chain); } +bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, + const uint32_t *CallerPreservedMask, + const SmallVectorImpl<CCValAssign> &ArgLocs, + const SmallVectorImpl<SDValue> &OutVals) const { + for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { + const CCValAssign &ArgLoc = ArgLocs[I]; + if (!ArgLoc.isRegLoc()) + continue; + unsigned Reg = ArgLoc.getLocReg(); + // Only look at callee saved registers. + if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg)) + continue; + // Check that we pass the value used for the caller. + // (We look for a CopyFromReg reading a virtual register that is used + // for the function live-in value of register Reg) + SDValue Value = OutVals[I]; + if (Value->getOpcode() != ISD::CopyFromReg) + return false; + unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg(); + if (MRI.getLiveInPhysReg(ArgReg) != Reg) + return false; + } + return true; +} + /// \brief Set CallLoweringInfo attribute flags based on a call instruction /// and called function attributes. void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS, @@ -77,17 +106,17 @@ void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS, isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca); isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); + isSwiftSelf = CS->paramHasAttr(AttrIdx, Attribute::SwiftSelf); + isSwiftError = CS->paramHasAttr(AttrIdx, Attribute::SwiftError); Alignment = CS->getParamAlignment(AttrIdx); } /// Generate a libcall taking the given operands as arguments and returning a /// result of type RetVT. std::pair<SDValue, SDValue> -TargetLowering::makeLibCall(SelectionDAG &DAG, - RTLIB::Libcall LC, EVT RetVT, - ArrayRef<SDValue> Ops, - bool isSigned, SDLoc dl, - bool doesNotReturn, +TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, + ArrayRef<SDValue> Ops, bool isSigned, + const SDLoc &dl, bool doesNotReturn, bool isReturnValueUsed) const { TargetLowering::ArgListTy Args; Args.reserve(Ops.size()); @@ -110,7 +139,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, TargetLowering::CallLoweringInfo CLI(DAG); bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) - .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed) .setSExtResult(signExtend).setZExtResult(!signExtend); return LowerCallTo(CLI); @@ -121,8 +150,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - SDLoc dl) const { - assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) + const SDLoc &dl) const { + assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128) && "Unsupported setcc type!"); // Expand into one or more soft-fp libcall(s). @@ -132,53 +161,65 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, case ISD::SETEQ: case ISD::SETOEQ: LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : - (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128; + (VT == MVT::f64) ? RTLIB::OEQ_F64 : + (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128; break; case ISD::SETNE: case ISD::SETUNE: LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : - (VT == MVT::f64) ? RTLIB::UNE_F64 : RTLIB::UNE_F128; + (VT == MVT::f64) ? RTLIB::UNE_F64 : + (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128; break; case ISD::SETGE: case ISD::SETOGE: LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : - (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128; + (VT == MVT::f64) ? RTLIB::OGE_F64 : + (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128; break; case ISD::SETLT: case ISD::SETOLT: LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : - (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + (VT == MVT::f64) ? RTLIB::OLT_F64 : + (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128; break; case ISD::SETLE: case ISD::SETOLE: LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : - (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128; + (VT == MVT::f64) ? RTLIB::OLE_F64 : + (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128; break; case ISD::SETGT: case ISD::SETOGT: LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : - (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + (VT == MVT::f64) ? RTLIB::OGT_F64 : + (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128; break; case ISD::SETUO: LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : - (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128; + (VT == MVT::f64) ? RTLIB::UO_F64 : + (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128; break; case ISD::SETO: LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : - (VT == MVT::f64) ? RTLIB::O_F64 : RTLIB::O_F128; + (VT == MVT::f64) ? RTLIB::O_F64 : + (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128; break; case ISD::SETONE: // SETONE = SETOLT | SETOGT LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : - (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + (VT == MVT::f64) ? RTLIB::OLT_F64 : + (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128; LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : - (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + (VT == MVT::f64) ? RTLIB::OGT_F64 : + (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128; break; case ISD::SETUEQ: LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : - (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128; + (VT == MVT::f64) ? RTLIB::UO_F64 : + (VT == MVT::f128) ? RTLIB::UO_F64 : RTLIB::UO_PPCF128; LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : - (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128; + (VT == MVT::f64) ? RTLIB::OEQ_F64 : + (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128; break; default: // Invert CC for unordered comparisons @@ -186,19 +227,23 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, switch (CCCode) { case ISD::SETULT: LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : - (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128; + (VT == MVT::f64) ? RTLIB::OGE_F64 : + (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128; break; case ISD::SETULE: LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : - (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + (VT == MVT::f64) ? RTLIB::OGT_F64 : + (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128; break; case ISD::SETUGT: LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : - (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128; + (VT == MVT::f64) ? RTLIB::OLE_F64 : + (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128; break; case ISD::SETUGE: LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : - (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + (VT == MVT::f64) ? RTLIB::OLT_F64 : + (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128; break; default: llvm_unreachable("Do not know how to soften this setcc!"); } @@ -235,7 +280,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. unsigned TargetLowering::getJumpTableEncoding() const { // In non-pic modes, just use the address of a block. - if (getTargetMachine().getRelocationModel() != Reloc::PIC_) + if (!isPositionIndependent()) return MachineJumpTableInfo::EK_BlockAddress; // In PIC mode, if the target supports a GPRel32 directive, use it. @@ -269,17 +314,20 @@ TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, bool TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { - // Assume that everything is safe in static mode. - if (getTargetMachine().getRelocationModel() == Reloc::Static) - return true; + const TargetMachine &TM = getTargetMachine(); + const GlobalValue *GV = GA->getGlobal(); - // In dynamic-no-pic mode, assume that known defined values are safe. - if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC && - GA && GA->getGlobal()->isStrongDefinitionForLinker()) - return true; + // If the address is not even local to this DSO we will have to load it from + // a got and then add the offset. + if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) + return false; - // Otherwise assume nothing is safe. - return false; + // If the code is position independent we will have to add a base register. + if (isPositionIndependent()) + return false; + + // Otherwise we can do it. + return true; } //===----------------------------------------------------------------------===// @@ -326,11 +374,10 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be /// generalized for targets with other types of implicit widening casts. -bool -TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, - unsigned BitWidth, - const APInt &Demanded, - SDLoc dl) { +bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, + unsigned BitWidth, + const APInt &Demanded, + const SDLoc &dl) { assert(Op.getNumOperands() == 2 && "ShrinkDemandedOp only supports binary operators!"); assert(Op.getNode()->getNumValues() == 1 && @@ -407,7 +454,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, NewMask = APInt::getAllOnesValue(BitWidth); } else if (DemandedMask == 0) { // Not demanding any bits from Op. - if (Op.getOpcode() != ISD::UNDEF) + if (!Op.isUndef()) return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType())); return false; } else if (Depth == 6) { // Limit search depth. @@ -1157,37 +1204,6 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, return 1; } -/// Test if the given value is known to have exactly one bit set. This differs -/// from computeKnownBits in that it doesn't need to determine which bit is set. -static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { - // A left-shift of a constant one will have exactly one bit set, because - // shifting the bit off the end is undefined. - if (Val.getOpcode() == ISD::SHL) - if (ConstantSDNode *C = - dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0))) - if (C->getAPIntValue() == 1) - return true; - - // Similarly, a right-shift of a constant sign-bit will have exactly - // one bit set. - if (Val.getOpcode() == ISD::SRL) - if (ConstantSDNode *C = - dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0))) - if (C->getAPIntValue().isSignBit()) - return true; - - // More could be done here, though the above checks are enough - // to handle some common cases. - - // Fall back to computeKnownBits to catch other known cases. - EVT OpVT = Val.getValueType(); - unsigned BitWidth = OpVT.getScalarType().getSizeInBits(); - APInt KnownZero, KnownOne; - DAG.computeKnownBits(Val, KnownZero, KnownOne); - return (KnownZero.countPopulation() == BitWidth - 1) && - (KnownOne.countPopulation() == 1); -} - bool TargetLowering::isConstTrueVal(const SDNode *N) const { if (!N) return false; @@ -1218,6 +1234,16 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const { llvm_unreachable("Invalid boolean contents"); } +SDValue TargetLowering::getConstTrueVal(SelectionDAG &DAG, EVT VT, + const SDLoc &DL) const { + unsigned ElementWidth = VT.getScalarSizeInBits(); + APInt TrueInt = + getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent + ? APInt(ElementWidth, 1) + : APInt::getAllOnesValue(ElementWidth); + return DAG.getConstant(TrueInt, DL, VT); +} + bool TargetLowering::isConstFalseVal(const SDNode *N) const { if (!N) return false; @@ -1242,12 +1268,91 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const { return CN->isNullValue(); } +bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT, + bool SExt) const { + if (VT == MVT::i1) + return N->isOne(); + + TargetLowering::BooleanContent Cnt = getBooleanContents(VT); + switch (Cnt) { + case TargetLowering::ZeroOrOneBooleanContent: + // An extended value of 1 is always true, unless its original type is i1, + // in which case it will be sign extended to -1. + return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1)); + case TargetLowering::UndefinedBooleanContent: + case TargetLowering::ZeroOrNegativeOneBooleanContent: + return N->isAllOnesValue() && SExt; + } + llvm_unreachable("Unexpected enumeration."); +} + +/// This helper function of SimplifySetCC tries to optimize the comparison when +/// either operand of the SetCC node is a bitwise-and instruction. +SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, + DAGCombinerInfo &DCI, + const SDLoc &DL) const { + // Match these patterns in any of their permutations: + // (X & Y) == Y + // (X & Y) != Y + if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND) + std::swap(N0, N1); + + EVT OpVT = N0.getValueType(); + if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() || + (Cond != ISD::SETEQ && Cond != ISD::SETNE)) + return SDValue(); + + SDValue X, Y; + if (N0.getOperand(0) == N1) { + X = N0.getOperand(1); + Y = N0.getOperand(0); + } else if (N0.getOperand(1) == N1) { + X = N0.getOperand(0); + Y = N0.getOperand(1); + } else { + return SDValue(); + } + + SelectionDAG &DAG = DCI.DAG; + SDValue Zero = DAG.getConstant(0, DL, OpVT); + if (DAG.isKnownToBeAPowerOfTwo(Y)) { + // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set. + // Note that where Y is variable and is known to have at most one bit set + // (for example, if it is Z & 1) we cannot do this; the expressions are not + // equivalent when Y == 0. + Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(Cond, N0.getSimpleValueType())) + return DAG.getSetCC(DL, VT, N0, Zero, Cond); + } else if (N0.hasOneUse() && hasAndNotCompare(Y)) { + // If the target supports an 'and-not' or 'and-complement' logic operation, + // try to use that to make a comparison operation more efficient. + // But don't do this transform if the mask is a single bit because there are + // more efficient ways to deal with that case (for example, 'bt' on x86 or + // 'rlwinm' on PPC). + + // Bail out if the compare operand that we want to turn into a zero is + // already a zero (otherwise, infinite loop). + auto *YConst = dyn_cast<ConstantSDNode>(Y); + if (YConst && YConst->isNullValue()) + return SDValue(); + + // Transform this into: ~X & Y == 0. + SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT); + SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y); + return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond); + } + + return SDValue(); +} + /// Try to simplify a setcc built with the specified operands and cc. If it is /// unable to simplify it, return a null SDValue. -SDValue -TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, - ISD::CondCode Cond, bool foldBooleans, - DAGCombinerInfo &DCI, SDLoc dl) const { +SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, bool foldBooleans, + DAGCombinerInfo &DCI, + const SDLoc &dl) const { SelectionDAG &DAG = DCI.DAG; // These setcc operations always fold. @@ -1376,6 +1481,38 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT); return DAG.getSetCC(dl, VT, Trunc, C, Cond); } + + // If truncating the setcc operands is not desirable, we can still + // simplify the expression in some cases: + // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc) + // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc)) + // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc)) + // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc) + // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc)) + // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc) + SDValue TopSetCC = N0->getOperand(0); + unsigned N0Opc = N0->getOpcode(); + bool SExt = (N0Opc == ISD::SIGN_EXTEND); + if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 && + TopSetCC.getOpcode() == ISD::SETCC && + (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) && + (isConstFalseVal(N1C) || + isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) { + + bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) || + (!N1C->isNullValue() && Cond == ISD::SETNE); + + if (!Inverse) + return TopSetCC; + + ISD::CondCode InvCond = ISD::getSetCCInverse( + cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(), + TopSetCC.getOperand(0).getValueType().isInteger()); + return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0), + TopSetCC.getOperand(1), + InvCond); + + } } } @@ -1426,9 +1563,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(), DAG.getConstant(bestOffset, dl, PtrType)); unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); - SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, - Lod->getPointerInfo().getWithOffset(bestOffset), - false, false, false, NewAlign); + SDValue NewLoad = DAG.getLoad( + newVT, dl, Lod->getChain(), Ptr, + Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign); return DAG.getSetCC(dl, VT, DAG.getNode(ISD::AND, dl, newVT, NewLoad, DAG.getConstant(bestMask.trunc(bestWidth), @@ -1994,32 +2131,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } - // Simplify x&y == y to x&y != 0 if y has exactly one bit set. - // Note that where y is variable and is known to have at most - // one bit set (for example, if it is z&1) we cannot do this; - // the expressions are not equivalent when y==0. - if (N0.getOpcode() == ISD::AND) - if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) { - if (ValueHasExactlyOneBitSet(N1, DAG)) { - Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); - if (DCI.isBeforeLegalizeOps() || - isCondCodeLegal(Cond, N0.getSimpleValueType())) { - SDValue Zero = DAG.getConstant(0, dl, N1.getValueType()); - return DAG.getSetCC(dl, VT, N0, Zero, Cond); - } - } - } - if (N1.getOpcode() == ISD::AND) - if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) { - if (ValueHasExactlyOneBitSet(N0, DAG)) { - Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); - if (DCI.isBeforeLegalizeOps() || - isCondCodeLegal(Cond, N1.getSimpleValueType())) { - SDValue Zero = DAG.getConstant(0, dl, N0.getValueType()); - return DAG.getSetCC(dl, VT, N1, Zero, Cond); - } - } - } + if (SDValue V = simplifySetCCWithAnd(VT, N0, N1, Cond, DCI, dl)) + return V; } // Fold away ALL boolean setcc's. @@ -2202,8 +2315,10 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, C = dyn_cast<ConstantSDNode>(Op.getOperand(0)); GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1)); } - if (!C || !GA) - C = nullptr, GA = nullptr; + if (!C || !GA) { + C = nullptr; + GA = nullptr; + } } // If we find a valid operand, map to the TargetXXX version so that the @@ -2260,7 +2375,7 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) { - if (RegName.equals_lower(RI->getName(*I))) { + if (RegName.equals_lower(RI->getRegAsmName(*I))) { std::pair<unsigned, const TargetRegisterClass*> S = std::make_pair(*I, RC); @@ -2680,7 +2795,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, /// \brief Given an exact SDIV by a constant, create a multiplication /// with the multiplicative inverse of the constant. static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d, - SDLoc dl, SelectionDAG &DAG, + const SDLoc &dl, SelectionDAG &DAG, std::vector<SDNode *> &Created) { assert(d != 0 && "Division by zero!"); @@ -3039,6 +3154,370 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, return true; } +SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, + SelectionDAG &DAG) const { + SDLoc SL(LD); + SDValue Chain = LD->getChain(); + SDValue BasePTR = LD->getBasePtr(); + EVT SrcVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = LD->getExtensionType(); + + unsigned NumElem = SrcVT.getVectorNumElements(); + + EVT SrcEltVT = SrcVT.getScalarType(); + EVT DstEltVT = LD->getValueType(0).getScalarType(); + + unsigned Stride = SrcEltVT.getSizeInBits() / 8; + assert(SrcEltVT.isByteSized()); + + EVT PtrVT = BasePTR.getValueType(); + + SmallVector<SDValue, 8> Vals; + SmallVector<SDValue, 8> LoadChains; + + for (unsigned Idx = 0; Idx < NumElem; ++Idx) { + SDValue ScalarLoad = + DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); + + BasePTR = DAG.getNode(ISD::ADD, SL, PtrVT, BasePTR, + DAG.getConstant(Stride, SL, PtrVT)); + + Vals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains); + SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, SL, LD->getValueType(0), Vals); + + return DAG.getMergeValues({ Value, NewChain }, SL); +} + +// FIXME: This relies on each element having a byte size, otherwise the stride +// is 0 and just overwrites the same location. ExpandStore currently expects +// this broken behavior. +SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, + SelectionDAG &DAG) const { + SDLoc SL(ST); + + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + SDValue Value = ST->getValue(); + EVT StVT = ST->getMemoryVT(); + + // The type of the data we want to save + EVT RegVT = Value.getValueType(); + EVT RegSclVT = RegVT.getScalarType(); + + // The type of data as saved in memory. + EVT MemSclVT = StVT.getScalarType(); + + EVT PtrVT = BasePtr.getValueType(); + + // Store Stride in bytes + unsigned Stride = MemSclVT.getSizeInBits() / 8; + EVT IdxVT = getVectorIdxTy(DAG.getDataLayout()); + unsigned NumElem = StVT.getVectorNumElements(); + + // Extract each of the elements from the original vector and save them into + // memory individually. + SmallVector<SDValue, 8> Stores; + for (unsigned Idx = 0; Idx < NumElem; ++Idx) { + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value, + DAG.getConstant(Idx, SL, IdxVT)); + + SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, + DAG.getConstant(Idx * Stride, SL, PtrVT)); + + // This scalar TruncStore may be illegal, but we legalize it later. + SDValue Store = DAG.getTruncStore( + Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride), + MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride), + ST->getMemOperand()->getFlags(), ST->getAAInfo()); + + Stores.push_back(Store); + } + + return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores); +} + +std::pair<SDValue, SDValue> +TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { + assert(LD->getAddressingMode() == ISD::UNINDEXED && + "unaligned indexed loads not implemented!"); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + EVT VT = LD->getValueType(0); + EVT LoadedVT = LD->getMemoryVT(); + SDLoc dl(LD); + if (VT.isFloatingPoint() || VT.isVector()) { + EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); + if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) { + if (!isOperationLegalOrCustom(ISD::LOAD, intVT)) { + // Scalarize the load and let the individual components be handled. + SDValue Scalarized = scalarizeVectorLoad(LD, DAG); + return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1)); + } + + // Expand to a (misaligned) integer load of the same size, + // then bitconvert to floating point or vector. + SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, + LD->getMemOperand()); + SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); + if (LoadedVT != VT) + Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND : + ISD::ANY_EXTEND, dl, VT, Result); + + return std::make_pair(Result, newLoad.getValue(1)); + } + + // Copy the value to a (aligned) stack slot using (unaligned) integer + // loads and stores, then do a (aligned) load from the stack slot. + MVT RegVT = getRegisterType(*DAG.getContext(), intVT); + unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; + + // Make sure the stack slot is also aligned for the register type. + SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); + + SmallVector<SDValue, 8> Stores; + SDValue StackPtr = StackBase; + unsigned Offset = 0; + + EVT PtrVT = Ptr.getValueType(); + EVT StackPtrVT = StackPtr.getValueType(); + + SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT); + SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT); + + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the original location. + SDValue Load = DAG.getLoad( + RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset), + MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(), + LD->getAAInfo()); + // Follow the load with a store to the stack slot. Remember the store. + Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo())); + // Increment the pointers. + Offset += RegBytes; + Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement); + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT, StackPtr, + StackPtrIncrement); + } + + // The last copy may be partial. Do an extending load. + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), + 8 * (LoadedBytes - Offset)); + SDValue Load = + DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, + LD->getPointerInfo().getWithOffset(Offset), MemVT, + MinAlign(LD->getAlignment(), Offset), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); + // Follow the load with a store to the stack slot. Remember the store. + // On big-endian machines this requires a truncating store to ensure + // that the bits end up in the right place. + Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo(), MemVT)); + + // The order of the stores doesn't matter - say it with a TokenFactor. + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + + // Finally, perform the original load only redirected to the stack slot. + Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, + MachinePointerInfo(), LoadedVT); + + // Callers expect a MERGE_VALUES node. + return std::make_pair(Load, TF); + } + + assert(LoadedVT.isInteger() && !LoadedVT.isVector() && + "Unaligned load of unsupported type."); + + // Compute the new VT that is half the size of the old one. This is an + // integer MVT. + unsigned NumBits = LoadedVT.getSizeInBits(); + EVT NewLoadedVT; + NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2); + NumBits >>= 1; + + unsigned Alignment = LD->getAlignment(); + unsigned IncrementSize = NumBits / 8; + ISD::LoadExtType HiExtType = LD->getExtensionType(); + + // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD. + if (HiExtType == ISD::NON_EXTLOAD) + HiExtType = ISD::ZEXTLOAD; + + // Load the value in two parts + SDValue Lo, Hi; + if (DAG.getDataLayout().isLittleEndian()) { + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), + NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), + LD->getAAInfo()); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + NewLoadedVT, MinAlign(Alignment, IncrementSize), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); + } else { + Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), + NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), + LD->getAAInfo()); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + NewLoadedVT, MinAlign(Alignment, IncrementSize), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); + } + + // aggregate the two parts + SDValue ShiftAmount = + DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(), + DAG.getDataLayout())); + SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount); + Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo); + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + return std::make_pair(Result, TF); +} + +SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, + SelectionDAG &DAG) const { + assert(ST->getAddressingMode() == ISD::UNINDEXED && + "unaligned indexed stores not implemented!"); + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + SDValue Val = ST->getValue(); + EVT VT = Val.getValueType(); + int Alignment = ST->getAlignment(); + + SDLoc dl(ST); + if (ST->getMemoryVT().isFloatingPoint() || + ST->getMemoryVT().isVector()) { + EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + if (isTypeLegal(intVT)) { + if (!isOperationLegalOrCustom(ISD::STORE, intVT)) { + // Scalarize the store and let the individual components be handled. + SDValue Result = scalarizeVectorStore(ST, DAG); + + return Result; + } + // Expand to a bitconvert of the value to the integer type of the + // same size, then a (misaligned) int store. + // FIXME: Does not handle truncating floating point stores! + SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); + Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), + Alignment, ST->getMemOperand()->getFlags()); + return Result; + } + // Do a (aligned) store to a stack slot, then copy from the stack slot + // to the final destination using (unaligned) integer loads and stores. + EVT StoredVT = ST->getMemoryVT(); + MVT RegVT = + getRegisterType(*DAG.getContext(), + EVT::getIntegerVT(*DAG.getContext(), + StoredVT.getSizeInBits())); + EVT PtrVT = Ptr.getValueType(); + unsigned StoredBytes = StoredVT.getSizeInBits() / 8; + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; + + // Make sure the stack slot is also aligned for the register type. + SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT); + + // Perform the original store, only redirected to the stack slot. + SDValue Store = DAG.getTruncStore(Chain, dl, Val, StackPtr, + MachinePointerInfo(), StoredVT); + + EVT StackPtrVT = StackPtr.getValueType(); + + SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT); + SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT); + SmallVector<SDValue, 8> Stores; + unsigned Offset = 0; + + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the stack slot. + SDValue Load = + DAG.getLoad(RegVT, dl, Store, StackPtr, MachinePointerInfo()); + // Store it to the final location. Remember the store. + Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, + ST->getPointerInfo().getWithOffset(Offset), + MinAlign(ST->getAlignment(), Offset), + ST->getMemOperand()->getFlags())); + // Increment the pointers. + Offset += RegBytes; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT, + StackPtr, StackPtrIncrement); + Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement); + } + + // The last store may be partial. Do a truncating store. On big-endian + // machines this requires an extending load from the stack slot to ensure + // that the bits are in the right place. + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), + 8 * (StoredBytes - Offset)); + + // Load from the stack slot. + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, + MachinePointerInfo(), MemVT); + + Stores.push_back( + DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, + ST->getPointerInfo().getWithOffset(Offset), MemVT, + MinAlign(ST->getAlignment(), Offset), + ST->getMemOperand()->getFlags(), ST->getAAInfo())); + // The order of the stores doesn't matter - say it with a TokenFactor. + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + return Result; + } + + assert(ST->getMemoryVT().isInteger() && + !ST->getMemoryVT().isVector() && + "Unaligned store of unknown type."); + // Get the half-size VT + EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext()); + int NumBits = NewStoredVT.getSizeInBits(); + int IncrementSize = NumBits / 8; + + // Divide the stored value in two parts. + SDValue ShiftAmount = + DAG.getConstant(NumBits, dl, getShiftAmountTy(Val.getValueType(), + DAG.getDataLayout())); + SDValue Lo = Val; + SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount); + + // Store the two parts + SDValue Store1, Store2; + Store1 = DAG.getTruncStore(Chain, dl, + DAG.getDataLayout().isLittleEndian() ? Lo : Hi, + Ptr, ST->getPointerInfo(), NewStoredVT, Alignment, + ST->getMemOperand()->getFlags()); + + EVT PtrVT = Ptr.getValueType(); + Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, + DAG.getConstant(IncrementSize, dl, PtrVT)); + Alignment = MinAlign(Alignment, IncrementSize); + Store2 = DAG.getTruncStore( + Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment, + ST->getMemOperand()->getFlags(), ST->getAAInfo()); + + SDValue Result = + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); + return Result; +} + //===----------------------------------------------------------------------===// // Implementation of Emulated TLS Model //===----------------------------------------------------------------------===// @@ -3057,9 +3536,7 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent()); StringRef EmuTlsVarName(NameString); GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName); - if (!EmuTlsVar) - EmuTlsVar = dyn_cast_or_null<GlobalVariable>( - VariableModule->getOrInsertGlobal(EmuTlsVarName, VoidPtrType)); + assert(EmuTlsVar && "Cannot find EmuTlsVar "); Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT); Entry.Ty = VoidPtrType; Args.push_back(Entry); @@ -3068,7 +3545,7 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()); - CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args), 0); + CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args)); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp deleted file mode 100644 index b12e943..0000000 --- a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp +++ /dev/null @@ -1,55 +0,0 @@ -//===-- ShadowStackGC.cpp - GC support for uncooperative targets ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements lowering for the llvm.gc* intrinsics for targets that do -// not natively support them (which includes the C backend). Note that the code -// generated is not quite as efficient as algorithms which generate stack maps -// to identify roots. -// -// This pass implements the code transformation described in this paper: -// "Accurate Garbage Collection in an Uncooperative Environment" -// Fergus Henderson, ISMM, 2002 -// -// In runtime/GC/SemiSpace.cpp is a prototype runtime which is compatible with -// ShadowStackGC. -// -// In order to support this particular transformation, all stack roots are -// coallocated in the stack. This allows a fully target-independent stack map -// while introducing only minor runtime overhead. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/GCs.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/CodeGen/GCStrategy.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" - -using namespace llvm; - -#define DEBUG_TYPE "shadowstackgc" - -namespace { -class ShadowStackGC : public GCStrategy { -public: - ShadowStackGC(); -}; -} - -static GCRegistry::Add<ShadowStackGC> - X("shadow-stack", "Very portable GC for uncooperative code generators"); - -void llvm::linkShadowStackGC() {} - -ShadowStackGC::ShadowStackGC() { - InitRoots = true; - CustomRoots = true; -} diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp index 878eeee..1efc440 100644 --- a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp @@ -8,7 +8,11 @@ //===----------------------------------------------------------------------===// // // This file contains the custom lowering code required by the shadow-stack GC -// strategy. +// strategy. +// +// This pass implements the code transformation described in this paper: +// "Accurate Garbage Collection in an Uncooperative Environment" +// Fergus Henderson, ISMM, 2002 // //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp index e1f242a..ce01c5f 100644 --- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Passes.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -24,18 +23,11 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include <set> using namespace llvm; #define DEBUG_TYPE "sjljehprepare" @@ -55,7 +47,6 @@ class SjLjEHPrepare : public FunctionPass { Constant *StackAddrFn; Constant *StackRestoreFn; Constant *LSDAAddrFn; - Value *PersonalityFn; Constant *CallSiteFn; Constant *FuncCtxFn; AllocaInst *FuncCtx; @@ -103,21 +94,6 @@ bool SjLjEHPrepare::doInitialization(Module &M) { VoidPtrTy, // __lsda doubleUnderJBufTy, // __jbuf nullptr); - RegisterFn = M.getOrInsertFunction( - "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()), - PointerType::getUnqual(FunctionContextTy), (Type *)nullptr); - UnregisterFn = M.getOrInsertFunction( - "_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()), - PointerType::getUnqual(FunctionContextTy), (Type *)nullptr); - FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); - StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); - StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); - BuiltinSetupDispatchFn = - Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setup_dispatch); - LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda); - CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite); - FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext); - PersonalityFn = nullptr; return true; } @@ -141,15 +117,15 @@ void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) { Builder.CreateStore(CallSiteNoC, CallSite, true /*volatile*/); } -/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until +/// MarkBlocksLiveIn - Insert BB and all of its predecessors into LiveBBs until /// we reach blocks we've already seen. static void MarkBlocksLiveIn(BasicBlock *BB, SmallPtrSetImpl<BasicBlock *> &LiveBBs) { if (!LiveBBs.insert(BB).second) return; // already been here. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - MarkBlocksLiveIn(*PI, LiveBBs); + for (BasicBlock *PredBB : predecessors(BB)) + MarkBlocksLiveIn(PredBB, LiveBBs); } /// substituteLPadValues - Substitute the values returned by the landingpad @@ -159,7 +135,7 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, SmallVector<Value *, 8> UseWorkList(LPI->user_begin(), LPI->user_end()); while (!UseWorkList.empty()) { Value *Val = UseWorkList.pop_back_val(); - ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val); + auto *EVI = dyn_cast<ExtractValueInst>(Val); if (!EVI) continue; if (EVI->getNumIndices() != 1) @@ -168,11 +144,11 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, EVI->replaceAllUsesWith(ExnVal); else if (*EVI->idx_begin() == 1) EVI->replaceAllUsesWith(SelVal); - if (EVI->getNumUses() == 0) + if (EVI->use_empty()) EVI->eraseFromParent(); } - if (LPI->getNumUses() == 0) + if (LPI->use_empty()) return; // There are still some uses of LPI. Construct an aggregate with the exception @@ -202,8 +178,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F, &EntryBB->front()); // Fill in the function context structure. - for (unsigned I = 0, E = LPads.size(); I != E; ++I) { - LandingPadInst *LPI = LPads[I]; + for (LandingPadInst *LPI : LPads) { IRBuilder<> Builder(LPI->getParent(), LPI->getParent()->getFirstInsertionPt()); @@ -226,8 +201,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F, // Personality function IRBuilder<> Builder(EntryBB->getTerminator()); - if (!PersonalityFn) - PersonalityFn = F.getPersonalityFn(); + Value *PersonalityFn = F.getPersonalityFn(); Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32( FunctionContextTy, FuncCtx, 0, 3, "pers_fn_gep"); Builder.CreateStore( @@ -250,7 +224,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F, void SjLjEHPrepare::lowerIncomingArguments(Function &F) { BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin(); while (isa<AllocaInst>(AfterAllocaInsPt) && - isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize())) + cast<AllocaInst>(AfterAllocaInsPt)->isStaticAlloca()) ++AfterAllocaInsPt; assert(AfterAllocaInsPt != F.front().end()); @@ -274,40 +248,37 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) { void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst *> Invokes) { // Finally, scan the code looking for instructions with bad live ranges. - for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { - for (BasicBlock::iterator II = BB->begin(), IIE = BB->end(); II != IIE; - ++II) { + for (BasicBlock &BB : F) { + for (Instruction &Inst : BB) { // Ignore obvious cases we don't have to handle. In particular, most // instructions either have no uses or only have a single use inside the // current block. Ignore them quickly. - Instruction *Inst = &*II; - if (Inst->use_empty()) + if (Inst.use_empty()) continue; - if (Inst->hasOneUse() && - cast<Instruction>(Inst->user_back())->getParent() == BB && - !isa<PHINode>(Inst->user_back())) + if (Inst.hasOneUse() && + cast<Instruction>(Inst.user_back())->getParent() == &BB && + !isa<PHINode>(Inst.user_back())) continue; // If this is an alloca in the entry block, it's not a real register // value. - if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) - if (isa<ConstantInt>(AI->getArraySize()) && BB == F.begin()) + if (auto *AI = dyn_cast<AllocaInst>(&Inst)) + if (AI->isStaticAlloca()) continue; // Avoid iterator invalidation by copying users to a temporary vector. SmallVector<Instruction *, 16> Users; - for (User *U : Inst->users()) { + for (User *U : Inst.users()) { Instruction *UI = cast<Instruction>(U); - if (UI->getParent() != BB || isa<PHINode>(UI)) + if (UI->getParent() != &BB || isa<PHINode>(UI)) Users.push_back(UI); } // Find all of the blocks that this value is live in. - SmallPtrSet<BasicBlock *, 64> LiveBBs; - LiveBBs.insert(Inst->getParent()); + SmallPtrSet<BasicBlock *, 32> LiveBBs; + LiveBBs.insert(&BB); while (!Users.empty()) { - Instruction *U = Users.back(); - Users.pop_back(); + Instruction *U = Users.pop_back_val(); if (!isa<PHINode>(U)) { MarkBlocksLiveIn(U->getParent(), LiveBBs); @@ -315,7 +286,7 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, // Uses for a PHI node occur in their predecessor block. PHINode *PN = cast<PHINode>(U); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == Inst) + if (PN->getIncomingValue(i) == &Inst) MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); } } @@ -323,10 +294,10 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, // Now that we know all of the blocks that this thing is live in, see if // it includes any of the unwind locations. bool NeedsSpill = false; - for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { - BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); - if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { - DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around " + for (InvokeInst *Invoke : Invokes) { + BasicBlock *UnwindBlock = Invoke->getUnwindDest(); + if (UnwindBlock != &BB && LiveBBs.count(UnwindBlock)) { + DEBUG(dbgs() << "SJLJ Spill: " << Inst << " around " << UnwindBlock->getName() << "\n"); NeedsSpill = true; break; @@ -338,15 +309,15 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, // the value to be reloaded from the stack slot, even those that aren't // in the unwind blocks. We should be more selective. if (NeedsSpill) { - DemoteRegToStack(*Inst, true); + DemoteRegToStack(Inst, true); ++NumSpilled; } } } // Go through the landing pads and remove any PHIs there. - for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { - BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); + for (InvokeInst *Invoke : Invokes) { + BasicBlock *UnwindBlock = Invoke->getUnwindDest(); LandingPadInst *LPI = UnwindBlock->getLandingPadInst(); // Place PHIs into a set to avoid invalidating the iterator. @@ -374,11 +345,10 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { SmallSetVector<LandingPadInst *, 16> LPads; // Look through the terminators of the basic blocks to find invokes. - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) - if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + for (BasicBlock &BB : F) + if (auto *II = dyn_cast<InvokeInst>(BB.getTerminator())) { if (Function *Callee = II->getCalledFunction()) - if (Callee->isIntrinsic() && - Callee->getIntrinsicID() == Intrinsic::donothing) { + if (Callee->getIntrinsicID() == Intrinsic::donothing) { // Remove the NOP invoke. BranchInst::Create(II->getNormalDest(), II); II->eraseFromParent(); @@ -387,7 +357,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { Invokes.push_back(II); LPads.insert(II->getUnwindDest()->getLandingPadInst()); - } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + } else if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) { Returns.push_back(RI); } @@ -448,14 +418,13 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { // created for this function and any unexpected exceptions thrown will go // directly to the caller's context, which is what we want anyway, so no need // to do anything here. - for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) - for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I) - if (CallInst *CI = dyn_cast<CallInst>(I)) { - if (!CI->doesNotThrow()) - insertCallSiteStore(CI, -1); - } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) { - insertCallSiteStore(RI, -1); - } + for (BasicBlock &BB : F) { + if (&BB == &F.front()) + continue; + for (Instruction &I : BB) + if (I.mayThrow()) + insertCallSiteStore(&I, -1); + } // Register the function context and make sure it's known to not throw CallInst *Register = @@ -464,18 +433,18 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { // Following any allocas not in the entry block, update the saved SP in the // jmpbuf to the new value. - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (BB == F.begin()) + for (BasicBlock &BB : F) { + if (&BB == &F.front()) continue; - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - if (CallInst *CI = dyn_cast<CallInst>(I)) { + for (Instruction &I : BB) { + if (auto *CI = dyn_cast<CallInst>(&I)) { if (CI->getCalledFunction() != StackRestoreFn) continue; - } else if (!isa<AllocaInst>(I)) { + } else if (!isa<AllocaInst>(&I)) { continue; } Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); - StackAddr->insertAfter(&*I); + StackAddr->insertAfter(&I); Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); StoreStackAddr->insertAfter(StackAddr); } @@ -483,13 +452,29 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { // Finally, for any returns from this function, if this function contains an // invoke, add a call to unregister the function context. - for (unsigned I = 0, E = Returns.size(); I != E; ++I) - CallInst::Create(UnregisterFn, FuncCtx, "", Returns[I]); + for (ReturnInst *Return : Returns) + CallInst::Create(UnregisterFn, FuncCtx, "", Return); return true; } bool SjLjEHPrepare::runOnFunction(Function &F) { + Module &M = *F.getParent(); + RegisterFn = M.getOrInsertFunction( + "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()), + PointerType::getUnqual(FunctionContextTy), nullptr); + UnregisterFn = M.getOrInsertFunction( + "_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()), + PointerType::getUnqual(FunctionContextTy), nullptr); + FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); + StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); + StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); + BuiltinSetupDispatchFn = + Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setup_dispatch); + LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda); + CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite); + FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext); + bool Res = setupEntryBlockAndCallSites(F); return Res; } diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp index c9d23f6..dba103e9 100644 --- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp +++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp @@ -69,34 +69,29 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { indexList.push_back(createEntry(nullptr, index)); // Iterate over the function. - for (MachineFunction::iterator mbbItr = mf->begin(), mbbEnd = mf->end(); - mbbItr != mbbEnd; ++mbbItr) { - MachineBasicBlock *mbb = &*mbbItr; - + for (MachineBasicBlock &MBB : *mf) { // Insert an index for the MBB start. SlotIndex blockStartIndex(&indexList.back(), SlotIndex::Slot_Block); - for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end(); - miItr != miEnd; ++miItr) { - MachineInstr *mi = miItr; - if (mi->isDebugValue()) + for (MachineInstr &MI : MBB) { + if (MI.isDebugValue()) continue; // Insert a store index for the instr. - indexList.push_back(createEntry(mi, index += SlotIndex::InstrDist)); + indexList.push_back(createEntry(&MI, index += SlotIndex::InstrDist)); // Save this base index in the maps. - mi2iMap.insert(std::make_pair(mi, SlotIndex(&indexList.back(), - SlotIndex::Slot_Block))); + mi2iMap.insert(std::make_pair( + &MI, SlotIndex(&indexList.back(), SlotIndex::Slot_Block))); } // We insert one blank instructions between basic blocks. indexList.push_back(createEntry(nullptr, index += SlotIndex::InstrDist)); - MBBRanges[mbb->getNumber()].first = blockStartIndex; - MBBRanges[mbb->getNumber()].second = SlotIndex(&indexList.back(), + MBBRanges[MBB.getNumber()].first = blockStartIndex; + MBBRanges[MBB.getNumber()].second = SlotIndex(&indexList.back(), SlotIndex::Slot_Block); - idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb)); + idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, &MBB)); } // Sort the Idx2MBBMap @@ -150,9 +145,9 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB, // does the same thing. // Find anchor points, which are at the beginning/end of blocks or at // instructions that already have indexes. - while (Begin != MBB->begin() && !hasIndex(Begin)) + while (Begin != MBB->begin() && !hasIndex(*Begin)) --Begin; - while (End != MBB->end() && !hasIndex(End)) + while (End != MBB->end() && !hasIndex(*End)) ++End; bool includeStart = (Begin == MBB->begin()); @@ -160,13 +155,13 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB, if (includeStart) startIdx = getMBBStartIdx(MBB); else - startIdx = getInstructionIndex(Begin); + startIdx = getInstructionIndex(*Begin); SlotIndex endIdx; if (End == MBB->end()) endIdx = getMBBEndIdx(MBB); else - endIdx = getInstructionIndex(End); + endIdx = getInstructionIndex(*End); // FIXME: Conceptually, this code is implementing an iterator on MBB that // optionally includes an additional position prior to MBB->begin(), indicated @@ -182,7 +177,7 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB, "Decremented past the beginning of region to repair."); MachineInstr *SlotMI = ListI->getInstr(); - MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? MBBI : nullptr; + MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? &*MBBI : nullptr; bool MBBIAtBegin = MBBI == Begin && (!includeStart || pastStart); if (SlotMI == MI && !MBBIAtBegin) { @@ -199,7 +194,7 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB, } else { --ListI; if (SlotMI) - removeMachineInstrFromMaps(SlotMI); + removeMachineInstrFromMaps(*SlotMI); } } @@ -207,14 +202,14 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB, // to update the IndexList while we are iterating it. for (MachineBasicBlock::iterator I = End; I != Begin;) { --I; - MachineInstr *MI = I; - if (!MI->isDebugValue() && mi2iMap.find(MI) == mi2iMap.end()) + MachineInstr &MI = *I; + if (!MI.isDebugValue() && mi2iMap.find(&MI) == mi2iMap.end()) insertMachineInstrInMaps(MI); } } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void SlotIndexes::dump() const { +LLVM_DUMP_METHOD void SlotIndexes::dump() const { for (IndexList::const_iterator itr = indexList.begin(); itr != indexList.end(); ++itr) { dbgs() << itr->getIndex() << " "; @@ -242,7 +237,7 @@ void SlotIndex::print(raw_ostream &os) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) // Dump a SlotIndex to stderr. -void SlotIndex::dump() const { +LLVM_DUMP_METHOD void SlotIndex::dump() const { print(dbgs()); dbgs() << "\n"; } diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp index d30cfc2..f10c98e 100644 --- a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp @@ -173,6 +173,17 @@ struct SpillPlacement::Node { Value = 0; return Before != preferReg(); } + + void getDissentingNeighbors(SparseSet<unsigned> &List, + const Node nodes[]) const { + for (const auto &Elt : Links) { + unsigned n = Elt.second; + // Neighbors that already have the same value are not going to + // change because of this node changing. + if (Value != nodes[n].Value) + List.insert(n); + } + } }; bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { @@ -182,6 +193,8 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { assert(!nodes && "Leaking node array"); nodes = new Node[bundles->getNumBundles()]; + TodoList.clear(); + TodoList.setUniverse(bundles->getNumBundles()); // Compute total ingoing and outgoing block frequencies for all bundles. BlockFrequencies.resize(mf.getNumBlockIDs()); @@ -199,10 +212,12 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { void SpillPlacement::releaseMemory() { delete[] nodes; nodes = nullptr; + TodoList.clear(); } /// activate - mark node n as active if it wasn't already. void SpillPlacement::activate(unsigned n) { + TodoList.insert(n); if (ActiveNodes->test(n)) return; ActiveNodes->set(n); @@ -287,10 +302,6 @@ void SpillPlacement::addLinks(ArrayRef<unsigned> Links) { continue; activate(ib); activate(ob); - if (nodes[ib].Links.empty() && !nodes[ib].mustSpill()) - Linked.push_back(ib); - if (nodes[ob].Links.empty() && !nodes[ob].mustSpill()) - Linked.push_back(ob); BlockFrequency Freq = BlockFrequencies[Number]; nodes[ib].addLink(ob, Freq); nodes[ob].addLink(ib, Freq); @@ -298,76 +309,50 @@ void SpillPlacement::addLinks(ArrayRef<unsigned> Links) { } bool SpillPlacement::scanActiveBundles() { - Linked.clear(); RecentPositive.clear(); for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) { - nodes[n].update(nodes, Threshold); + update(n); // A node that must spill, or a node without any links is not going to // change its value ever again, so exclude it from iterations. if (nodes[n].mustSpill()) continue; - if (!nodes[n].Links.empty()) - Linked.push_back(n); if (nodes[n].preferReg()) RecentPositive.push_back(n); } return !RecentPositive.empty(); } +bool SpillPlacement::update(unsigned n) { + if (!nodes[n].update(nodes, Threshold)) + return false; + nodes[n].getDissentingNeighbors(TodoList, nodes); + return true; +} + /// iterate - Repeatedly update the Hopfield nodes until stability or the /// maximum number of iterations is reached. -/// @param Linked - Numbers of linked nodes that need updating. void SpillPlacement::iterate() { - // First update the recently positive nodes. They have likely received new - // negative bias that will turn them off. - while (!RecentPositive.empty()) - nodes[RecentPositive.pop_back_val()].update(nodes, Threshold); - - if (Linked.empty()) - return; + // We do not need to push those node in the todolist. + // They are already been proceeded as part of the previous iteration. + RecentPositive.clear(); - // Run up to 10 iterations. The edge bundle numbering is closely related to - // basic block numbering, so there is a strong tendency towards chains of - // linked nodes with sequential numbers. By scanning the linked nodes - // backwards and forwards, we make it very likely that a single node can - // affect the entire network in a single iteration. That means very fast - // convergence, usually in a single iteration. - for (unsigned iteration = 0; iteration != 10; ++iteration) { - // Scan backwards, skipping the last node when iteration is not zero. When - // iteration is not zero, the last node was just updated. - bool Changed = false; - for (SmallVectorImpl<unsigned>::const_reverse_iterator I = - iteration == 0 ? Linked.rbegin() : std::next(Linked.rbegin()), - E = Linked.rend(); I != E; ++I) { - unsigned n = *I; - if (nodes[n].update(nodes, Threshold)) { - Changed = true; - if (nodes[n].preferReg()) - RecentPositive.push_back(n); - } - } - if (!Changed || !RecentPositive.empty()) - return; - - // Scan forwards, skipping the first node which was just updated. - Changed = false; - for (SmallVectorImpl<unsigned>::const_iterator I = - std::next(Linked.begin()), E = Linked.end(); I != E; ++I) { - unsigned n = *I; - if (nodes[n].update(nodes, Threshold)) { - Changed = true; - if (nodes[n].preferReg()) - RecentPositive.push_back(n); - } - } - if (!Changed || !RecentPositive.empty()) - return; + // Since the last iteration, the todolist have been augmented by calls + // to addConstraints, addLinks, and co. + // Update the network energy starting at this new frontier. + // The call to ::update will add the nodes that changed into the todolist. + unsigned Limit = bundles->getNumBundles() * 10; + while(Limit-- > 0 && !TodoList.empty()) { + unsigned n = TodoList.pop_back_val(); + if (!update(n)) + continue; + if (nodes[n].preferReg()) + RecentPositive.push_back(n); } } void SpillPlacement::prepare(BitVector &RegBundles) { - Linked.clear(); RecentPositive.clear(); + TodoList.clear(); // Reuse RegBundles as our ActiveNodes vector. ActiveNodes = &RegBundles; ActiveNodes->clear(); diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.h b/contrib/llvm/lib/CodeGen/SpillPlacement.h index 03dd58d..9b9eccc 100644 --- a/contrib/llvm/lib/CodeGen/SpillPlacement.h +++ b/contrib/llvm/lib/CodeGen/SpillPlacement.h @@ -29,6 +29,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SparseSet.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Support/BlockFrequency.h" @@ -66,6 +67,9 @@ class SpillPlacement : public MachineFunctionPass { /// its inputs falls in the open interval (-Threshold;Threshold). BlockFrequency Threshold; + /// List of nodes that need to be updated in ::iterate. + SparseSet<unsigned> TodoList; + public: static char ID; // Pass identification, replacement for typeid. @@ -157,6 +161,8 @@ private: void activate(unsigned); void setThreshold(const BlockFrequency &Entry); + + bool update(unsigned); }; } // end namespace llvm diff --git a/contrib/llvm/lib/CodeGen/Spiller.h b/contrib/llvm/lib/CodeGen/Spiller.h index 08f99ec..61ee508 100644 --- a/contrib/llvm/lib/CodeGen/Spiller.h +++ b/contrib/llvm/lib/CodeGen/Spiller.h @@ -16,6 +16,7 @@ namespace llvm { class MachineFunction; class MachineFunctionPass; class VirtRegMap; + class LiveIntervals; /// Spiller interface. /// @@ -28,7 +29,7 @@ namespace llvm { /// spill - Spill the LRE.getParent() live interval. virtual void spill(LiveRangeEdit &LRE) = 0; - + virtual void postOptimization(){}; }; /// Create and return a spiller that will insert spill code directly instead @@ -36,7 +37,6 @@ namespace llvm { Spiller *createInlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm); - } #endif diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp index 51dddab..07be24b 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.cpp +++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -37,82 +38,101 @@ STATISTIC(NumRemats, "Number of rematerialized defs for splitting"); STATISTIC(NumRepairs, "Number of invalid live ranges repaired"); //===----------------------------------------------------------------------===// -// Split Analysis +// Last Insert Point Analysis //===----------------------------------------------------------------------===// -SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis, - const MachineLoopInfo &mli) - : MF(vrm.getMachineFunction()), VRM(vrm), LIS(lis), Loops(mli), - TII(*MF.getSubtarget().getInstrInfo()), CurLI(nullptr), - LastSplitPoint(MF.getNumBlockIDs()) {} +InsertPointAnalysis::InsertPointAnalysis(const LiveIntervals &lis, + unsigned BBNum) + : LIS(lis), LastInsertPoint(BBNum) {} -void SplitAnalysis::clear() { - UseSlots.clear(); - UseBlocks.clear(); - ThroughBlocks.clear(); - CurLI = nullptr; - DidRepairRange = false; -} +SlotIndex +InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI, + const MachineBasicBlock &MBB) { + unsigned Num = MBB.getNumber(); + std::pair<SlotIndex, SlotIndex> &LIP = LastInsertPoint[Num]; + SlotIndex MBBEnd = LIS.getMBBEndIdx(&MBB); -SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) { - const MachineBasicBlock *MBB = MF.getBlockNumbered(Num); - // FIXME: Handle multiple EH pad successors. - const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor(); - std::pair<SlotIndex, SlotIndex> &LSP = LastSplitPoint[Num]; - SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB); + SmallVector<const MachineBasicBlock *, 1> EHPadSucessors; + for (const MachineBasicBlock *SMBB : MBB.successors()) + if (SMBB->isEHPad()) + EHPadSucessors.push_back(SMBB); - // Compute split points on the first call. The pair is independent of the + // Compute insert points on the first call. The pair is independent of the // current live interval. - if (!LSP.first.isValid()) { - MachineBasicBlock::const_iterator FirstTerm = MBB->getFirstTerminator(); - if (FirstTerm == MBB->end()) - LSP.first = MBBEnd; + if (!LIP.first.isValid()) { + MachineBasicBlock::const_iterator FirstTerm = MBB.getFirstTerminator(); + if (FirstTerm == MBB.end()) + LIP.first = MBBEnd; else - LSP.first = LIS.getInstructionIndex(FirstTerm); + LIP.first = LIS.getInstructionIndex(*FirstTerm); // If there is a landing pad successor, also find the call instruction. - if (!LPad) - return LSP.first; + if (EHPadSucessors.empty()) + return LIP.first; // There may not be a call instruction (?) in which case we ignore LPad. - LSP.second = LSP.first; - for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin(); + LIP.second = LIP.first; + for (MachineBasicBlock::const_iterator I = MBB.end(), E = MBB.begin(); I != E;) { --I; if (I->isCall()) { - LSP.second = LIS.getInstructionIndex(I); + LIP.second = LIS.getInstructionIndex(*I); break; } } } - // If CurLI is live into a landing pad successor, move the last split point + // If CurLI is live into a landing pad successor, move the last insert point // back to the call that may throw. - if (!LPad || !LSP.second || !LIS.isLiveInToMBB(*CurLI, LPad)) - return LSP.first; + if (!LIP.second) + return LIP.first; + + if (none_of(EHPadSucessors, [&](const MachineBasicBlock *EHPad) { + return LIS.isLiveInToMBB(CurLI, EHPad); + })) + return LIP.first; // Find the value leaving MBB. - const VNInfo *VNI = CurLI->getVNInfoBefore(MBBEnd); + const VNInfo *VNI = CurLI.getVNInfoBefore(MBBEnd); if (!VNI) - return LSP.first; + return LIP.first; // If the value leaving MBB was defined after the call in MBB, it can't // really be live-in to the landing pad. This can happen if the landing pad // has a PHI, and this register is undef on the exceptional edge. // <rdar://problem/10664933> - if (!SlotIndex::isEarlierInstr(VNI->def, LSP.second) && VNI->def < MBBEnd) - return LSP.first; + if (!SlotIndex::isEarlierInstr(VNI->def, LIP.second) && VNI->def < MBBEnd) + return LIP.first; // Value is properly live-in to the landing pad. - // Only allow splits before the call. - return LSP.second; + // Only allow inserts before the call. + return LIP.second; } MachineBasicBlock::iterator -SplitAnalysis::getLastSplitPointIter(MachineBasicBlock *MBB) { - SlotIndex LSP = getLastSplitPoint(MBB->getNumber()); - if (LSP == LIS.getMBBEndIdx(MBB)) - return MBB->end(); - return LIS.getInstructionFromIndex(LSP); +InsertPointAnalysis::getLastInsertPointIter(const LiveInterval &CurLI, + MachineBasicBlock &MBB) { + SlotIndex LIP = getLastInsertPoint(CurLI, MBB); + if (LIP == LIS.getMBBEndIdx(&MBB)) + return MBB.end(); + return LIS.getInstructionFromIndex(LIP); +} + +//===----------------------------------------------------------------------===// +// Split Analysis +//===----------------------------------------------------------------------===// + +SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis, + const MachineLoopInfo &mli) + : MF(vrm.getMachineFunction()), VRM(vrm), LIS(lis), Loops(mli), + TII(*MF.getSubtarget().getInstrInfo()), CurLI(nullptr), + IPA(lis, MF.getNumBlockIDs()) {} + +void SplitAnalysis::clear() { + UseSlots.clear(); + UseBlocks.clear(); + ThroughBlocks.clear(); + CurLI = nullptr; + DidRepairRange = false; } /// analyzeUses - Count instructions, basic blocks, and loops using CurLI. @@ -129,7 +149,7 @@ void SplitAnalysis::analyzeUses() { const MachineRegisterInfo &MRI = MF.getRegInfo(); for (MachineOperand &MO : MRI.use_nodbg_operands(CurLI->reg)) if (!MO.isUndef()) - UseSlots.push_back(LIS.getInstructionIndex(MO.getParent()).getRegSlot()); + UseSlots.push_back(LIS.getInstructionIndex(*MO.getParent()).getRegSlot()); array_pod_sort(UseSlots.begin(), UseSlots.end()); @@ -318,11 +338,13 @@ void SplitAnalysis::analyze(const LiveInterval *li) { //===----------------------------------------------------------------------===// /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. -SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm, +SplitEditor::SplitEditor(SplitAnalysis &sa, AliasAnalysis &aa, + LiveIntervals &lis, VirtRegMap &vrm, MachineDominatorTree &mdt, MachineBlockFrequencyInfo &mbfi) - : SA(sa), LIS(lis), VRM(vrm), MRI(vrm.getMachineFunction().getRegInfo()), - MDT(mdt), TII(*vrm.getMachineFunction().getSubtarget().getInstrInfo()), + : SA(sa), AA(aa), LIS(lis), VRM(vrm), + MRI(vrm.getMachineFunction().getRegInfo()), MDT(mdt), + TII(*vrm.getMachineFunction().getSubtarget().getInstrInfo()), TRI(*vrm.getMachineFunction().getSubtarget().getRegisterInfo()), MBFI(mbfi), Edit(nullptr), OpenIdx(0), SpillMode(SM_Partition), RegAssign(Allocator) {} @@ -347,7 +369,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void SplitEditor::dump() const { +LLVM_DUMP_METHOD void SplitEditor::dump() const { if (RegAssign.empty()) { dbgs() << " empty\n"; return; @@ -430,16 +452,22 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, bool Late = RegIdx != 0; // Attempt cheap-as-a-copy rematerialization. + unsigned Original = VRM.getOriginal(Edit->get(RegIdx)); + LiveInterval &OrigLI = LIS.getInterval(Original); + VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx); LiveRangeEdit::Remat RM(ParentVNI); - if (Edit->canRematerializeAt(RM, UseIdx, true)) { + RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def); + + if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) { Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late); ++NumRemats; } else { // Can't remat, just insert a copy from parent. CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg) .addReg(Edit->getReg()); - Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late) - .getRegSlot(); + Def = LIS.getSlotIndexes() + ->insertMachineInstrInMaps(*CopyMI, Late) + .getRegSlot(); ++NumCopies; } @@ -638,7 +666,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { DEBUG(dbgs() << "Removing " << Def << '\t' << *MI); LIS.removeVRegDefAt(*LI, Def); - LIS.RemoveMachineInstrFromMaps(MI); + LIS.RemoveMachineInstrFromMaps(*MI); MI->eraseFromParent(); // Adjust RegAssign if a register assignment is killed at Def. We want to @@ -654,7 +682,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n'); forceRecompute(RegIdx, Edit->getParent().getVNInfoAt(Def)); } else { - SlotIndex Kill = LIS.getInstructionIndex(MBBI).getRegSlot(); + SlotIndex Kill = LIS.getInstructionIndex(*MBBI).getRegSlot(); DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI); AssignI.setStop(Kill); } @@ -715,7 +743,62 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB, } } -void SplitEditor::hoistCopiesForSize() { +void SplitEditor::computeRedundantBackCopies( + DenseSet<unsigned> &NotToHoistSet, SmallVectorImpl<VNInfo *> &BackCopies) { + LiveInterval *LI = &LIS.getInterval(Edit->get(0)); + LiveInterval *Parent = &Edit->getParent(); + SmallVector<SmallPtrSet<VNInfo *, 8>, 8> EqualVNs(Parent->getNumValNums()); + SmallPtrSet<VNInfo *, 8> DominatedVNIs; + + // Aggregate VNIs having the same value as ParentVNI. + for (VNInfo *VNI : LI->valnos) { + if (VNI->isUnused()) + continue; + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); + EqualVNs[ParentVNI->id].insert(VNI); + } + + // For VNI aggregation of each ParentVNI, collect dominated, i.e., + // redundant VNIs to BackCopies. + for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) { + VNInfo *ParentVNI = Parent->getValNumInfo(i); + if (!NotToHoistSet.count(ParentVNI->id)) + continue; + SmallPtrSetIterator<VNInfo *> It1 = EqualVNs[ParentVNI->id].begin(); + SmallPtrSetIterator<VNInfo *> It2 = It1; + for (; It1 != EqualVNs[ParentVNI->id].end(); ++It1) { + It2 = It1; + for (++It2; It2 != EqualVNs[ParentVNI->id].end(); ++It2) { + if (DominatedVNIs.count(*It1) || DominatedVNIs.count(*It2)) + continue; + + MachineBasicBlock *MBB1 = LIS.getMBBFromIndex((*It1)->def); + MachineBasicBlock *MBB2 = LIS.getMBBFromIndex((*It2)->def); + if (MBB1 == MBB2) { + DominatedVNIs.insert((*It1)->def < (*It2)->def ? (*It2) : (*It1)); + } else if (MDT.dominates(MBB1, MBB2)) { + DominatedVNIs.insert(*It2); + } else if (MDT.dominates(MBB2, MBB1)) { + DominatedVNIs.insert(*It1); + } + } + } + if (!DominatedVNIs.empty()) { + forceRecompute(0, ParentVNI); + for (auto VNI : DominatedVNIs) { + BackCopies.push_back(VNI); + } + DominatedVNIs.clear(); + } + } +} + +/// For SM_Size mode, find a common dominator for all the back-copies for +/// the same ParentVNI and hoist the backcopies to the dominator BB. +/// For SM_Speed mode, if the common dominator is hot and it is not beneficial +/// to do the hoisting, simply remove the dominated backcopies for the same +/// ParentVNI. +void SplitEditor::hoistCopies() { // Get the complement interval, always RegIdx 0. LiveInterval *LI = &LIS.getInterval(Edit->get(0)); LiveInterval *Parent = &Edit->getParent(); @@ -724,6 +807,11 @@ void SplitEditor::hoistCopiesForSize() { // indexed by ParentVNI->id. typedef std::pair<MachineBasicBlock*, SlotIndex> DomPair; SmallVector<DomPair, 8> NearestDom(Parent->getNumValNums()); + // The total cost of all the back-copies for each ParentVNI. + SmallVector<BlockFrequency, 8> Costs(Parent->getNumValNums()); + // The ParentVNI->id set for which hoisting back-copies are not beneficial + // for Speed. + DenseSet<unsigned> NotToHoistSet; // Find the nearest common dominator for parent values with multiple // back-copies. If a single back-copy dominates, put it in DomPair.second. @@ -739,6 +827,7 @@ void SplitEditor::hoistCopiesForSize() { continue; MachineBasicBlock *ValMBB = LIS.getMBBFromIndex(VNI->def); + DomPair &Dom = NearestDom[ParentVNI->id]; // Keep directly defined parent values. This is either a PHI or an @@ -773,6 +862,7 @@ void SplitEditor::hoistCopiesForSize() { else if (Near != Dom.first) // None dominate. Hoist to common dominator, need new def. Dom = DomPair(Near, SlotIndex()); + Costs[ParentVNI->id] += MBFI.getBlockFreq(ValMBB); } DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def @@ -791,6 +881,11 @@ void SplitEditor::hoistCopiesForSize() { MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(ParentVNI->def); // Get a less loopy dominator than Dom.first. Dom.first = findShallowDominator(Dom.first, DefMBB); + if (SpillMode == SM_Speed && + MBFI.getBlockFreq(Dom.first) > Costs[ParentVNI->id]) { + NotToHoistSet.insert(ParentVNI->id); + continue; + } SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot(); Dom.second = defFromParent(0, ParentVNI, Last, *Dom.first, @@ -805,11 +900,18 @@ void SplitEditor::hoistCopiesForSize() { continue; VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); const DomPair &Dom = NearestDom[ParentVNI->id]; - if (!Dom.first || Dom.second == VNI->def) + if (!Dom.first || Dom.second == VNI->def || + NotToHoistSet.count(ParentVNI->id)) continue; BackCopies.push_back(VNI); forceRecompute(0, ParentVNI); } + + // If it is not beneficial to hoist all the BackCopies, simply remove + // redundant BackCopies in speed mode. + if (SpillMode == SM_Speed && !NotToHoistSet.empty()) + computeRedundantBackCopies(NotToHoistSet, BackCopies); + removeBackCopies(BackCopies); } @@ -924,12 +1026,22 @@ bool SplitEditor::transferValues() { } void SplitEditor::extendPHIKillRanges() { - // Extend live ranges to be live-out for successor PHI values. + // Extend live ranges to be live-out for successor PHI values. for (const VNInfo *PHIVNI : Edit->getParent().valnos) { if (PHIVNI->isUnused() || !PHIVNI->isPHIDef()) continue; unsigned RegIdx = RegAssign.lookup(PHIVNI->def); LiveRange &LR = LIS.getInterval(Edit->get(RegIdx)); + + // Check whether PHI is dead. + const LiveRange::Segment *Segment = LR.getSegmentContaining(PHIVNI->def); + assert(Segment != nullptr && "Missing segment for VNI"); + if (Segment->end == PHIVNI->def.getDeadSlot()) { + // This is a dead PHI. Remove it. + LR.removeSegment(*Segment, true); + continue; + } + LiveRangeCalc &LRC = getLRCalc(RegIdx); MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def); for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), @@ -964,7 +1076,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { // <undef> operands don't really read the register, so it doesn't matter // which register we choose. When the use operand is tied to a def, we must // use the same register as the def, so just do that always. - SlotIndex Idx = LIS.getInstructionIndex(MI); + SlotIndex Idx = LIS.getInstructionIndex(*MI); if (MO.isDef() || MO.isUndef()) Idx = Idx.getRegSlot(MO.isEarlyClobber()); @@ -1003,6 +1115,8 @@ void SplitEditor::deleteRematVictims() { // Dead defs end at the dead slot. if (S.end != S.valno->def.getDeadSlot()) continue; + if (S.valno->isPHIDef()) + continue; MachineInstr *MI = LIS.getInstructionFromIndex(S.valno->def); assert(MI && "Missing instruction for dead def"); MI->addRegisterDead(LI->reg, &TRI); @@ -1018,7 +1132,7 @@ void SplitEditor::deleteRematVictims() { if (Dead.empty()) return; - Edit->eliminateDeadDefs(Dead); + Edit->eliminateDeadDefs(Dead, None, &AA); } void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { @@ -1047,22 +1161,22 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { // Leave all back-copies as is. break; case SM_Size: - hoistCopiesForSize(); - break; case SM_Speed: - llvm_unreachable("Spill mode 'speed' not implemented yet"); + // hoistCopies will behave differently between size and speed. + hoistCopies(); } // Transfer the simply mapped values, check if any are skipped. bool Skipped = transferValues(); + + // Rewrite virtual registers, possibly extending ranges. + rewriteAssigned(Skipped); + if (Skipped) extendPHIKillRanges(); else ++NumSimple; - // Rewrite virtual registers, possibly extending ranges. - rewriteAssigned(Skipped); - // Delete defs that were rematted everywhere. if (Skipped) deleteRematVictims(); diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h index 69c65ff..a968494 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.h +++ b/contrib/llvm/lib/CodeGen/SplitKit.h @@ -18,6 +18,7 @@ #include "LiveRangeCalc.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -37,6 +38,40 @@ class VirtRegMap; class VNInfo; class raw_ostream; +/// Determines the latest safe point in a block in which we can insert a split, +/// spill or other instruction related with CurLI. +class LLVM_LIBRARY_VISIBILITY InsertPointAnalysis { +private: + const LiveIntervals &LIS; + + /// Last legal insert point in each basic block in the current function. + /// The first entry is the first terminator, the second entry is the + /// last valid point to insert a split or spill for a variable that is + /// live into a landing pad successor. + SmallVector<std::pair<SlotIndex, SlotIndex>, 8> LastInsertPoint; + + SlotIndex computeLastInsertPoint(const LiveInterval &CurLI, + const MachineBasicBlock &MBB); + +public: + InsertPointAnalysis(const LiveIntervals &lis, unsigned BBNum); + + /// Return the base index of the last valid insert point for \pCurLI in \pMBB. + SlotIndex getLastInsertPoint(const LiveInterval &CurLI, + const MachineBasicBlock &MBB) { + unsigned Num = MBB.getNumber(); + // Inline the common simple case. + if (LastInsertPoint[Num].first.isValid() && + !LastInsertPoint[Num].second.isValid()) + return LastInsertPoint[Num].first; + return computeLastInsertPoint(CurLI, MBB); + } + + /// Returns the last insert point as an iterator for \pCurLI in \pMBB. + MachineBasicBlock::iterator getLastInsertPointIter(const LiveInterval &CurLI, + MachineBasicBlock &MBB); +}; + /// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting /// opportunities. class LLVM_LIBRARY_VISIBILITY SplitAnalysis { @@ -83,15 +118,12 @@ private: // Current live interval. const LiveInterval *CurLI; + /// Insert Point Analysis. + InsertPointAnalysis IPA; + // Sorted slot indexes of using instructions. SmallVector<SlotIndex, 8> UseSlots; - /// LastSplitPoint - Last legal split point in each basic block in the current - /// function. The first entry is the first terminator, the second entry is the - /// last valid split point for a variable that is live in to a landing pad - /// successor. - SmallVector<std::pair<SlotIndex, SlotIndex>, 8> LastSplitPoint; - /// UseBlocks - Blocks where CurLI has uses. SmallVector<BlockInfo, 8> UseBlocks; @@ -108,8 +140,6 @@ private: /// DidRepairRange - analyze was forced to shrinkToUses(). bool DidRepairRange; - SlotIndex computeLastSplitPoint(unsigned Num); - // Sumarize statistics by counting instructions using CurLI. void analyzeUses(); @@ -136,19 +166,6 @@ public: /// getParent - Return the last analyzed interval. const LiveInterval &getParent() const { return *CurLI; } - /// getLastSplitPoint - Return the base index of the last valid split point - /// in the basic block numbered Num. - SlotIndex getLastSplitPoint(unsigned Num) { - // Inline the common simple case. - if (LastSplitPoint[Num].first.isValid() && - !LastSplitPoint[Num].second.isValid()) - return LastSplitPoint[Num].first; - return computeLastSplitPoint(Num); - } - - /// getLastSplitPointIter - Returns the last split point as an iterator. - MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock*); - /// isOriginalEndpoint - Return true if the original live range was killed or /// (re-)defined at Idx. Idx should be the 'def' slot for a normal kill/def, /// and 'use' for an early-clobber def. @@ -194,6 +211,14 @@ public: /// @param BI The block to be isolated. /// @param SingleInstrs True when single instructions should be isolated. bool shouldSplitSingleBlock(const BlockInfo &BI, bool SingleInstrs) const; + + SlotIndex getLastSplitPoint(unsigned Num) { + return IPA.getLastInsertPoint(*CurLI, *MF.getBlockNumbered(Num)); + } + + MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock *BB) { + return IPA.getLastInsertPointIter(*CurLI, *BB); + } }; @@ -210,6 +235,7 @@ public: /// class LLVM_LIBRARY_VISIBILITY SplitEditor { SplitAnalysis &SA; + AliasAnalysis &AA; LiveIntervals &LIS; VirtRegMap &VRM; MachineRegisterInfo &MRI; @@ -329,9 +355,14 @@ private: MachineBasicBlock *findShallowDominator(MachineBasicBlock *MBB, MachineBasicBlock *DefMBB); - /// hoistCopiesForSize - Hoist back-copies to the complement interval in a - /// way that minimizes code size. This implements the SM_Size spill mode. - void hoistCopiesForSize(); + /// Find out all the backCopies dominated by others. + void computeRedundantBackCopies(DenseSet<unsigned> &NotToHoistSet, + SmallVectorImpl<VNInfo *> &BackCopies); + + /// Hoist back-copies to the complement interval. It tries to hoist all + /// the back-copies to one BB if it is beneficial, or else simply remove + /// redundant backcopies dominated by others. + void hoistCopies(); /// transferValues - Transfer values to the new ranges. /// Return true if any ranges were skipped. @@ -350,8 +381,9 @@ private: public: /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. /// Newly created intervals will be appended to newIntervals. - SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&, - MachineDominatorTree&, MachineBlockFrequencyInfo &); + SplitEditor(SplitAnalysis &SA, AliasAnalysis &AA, LiveIntervals&, + VirtRegMap&, MachineDominatorTree&, + MachineBlockFrequencyInfo &); /// reset - Prepare for a new split. void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition); diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp index 7b52038..87cd470 100644 --- a/contrib/llvm/lib/CodeGen/StackColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp @@ -21,33 +21,30 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SparseSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -67,18 +64,180 @@ DisableColoring("no-stack-coloring", /// The user may write code that uses allocas outside of the declared lifetime /// zone. This can happen when the user returns a reference to a local /// data-structure. We can detect these cases and decide not to optimize the -/// code. If this flag is enabled, we try to save the user. +/// code. If this flag is enabled, we try to save the user. This option +/// is treated as overriding LifetimeStartOnFirstUse below. static cl::opt<bool> ProtectFromEscapedAllocas("protect-from-escaped-allocas", cl::init(false), cl::Hidden, cl::desc("Do not optimize lifetime zones that " "are broken")); +/// Enable enhanced dataflow scheme for lifetime analysis (treat first +/// use of stack slot as start of slot lifetime, as opposed to looking +/// for LIFETIME_START marker). See "Implementation notes" below for +/// more info. +static cl::opt<bool> +LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use", + cl::init(true), cl::Hidden, + cl::desc("Treat stack lifetimes as starting on first use, not on START marker.")); + + STATISTIC(NumMarkerSeen, "Number of lifetime markers found."); STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots."); STATISTIC(StackSlotMerged, "Number of stack slot merged."); STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); +// +// Implementation Notes: +// --------------------- +// +// Consider the following motivating example: +// +// int foo() { +// char b1[1024], b2[1024]; +// if (...) { +// char b3[1024]; +// <uses of b1, b3>; +// return x; +// } else { +// char b4[1024], b5[1024]; +// <uses of b2, b4, b5>; +// return y; +// } +// } +// +// In the code above, "b3" and "b4" are declared in distinct lexical +// scopes, meaning that it is easy to prove that they can share the +// same stack slot. Variables "b1" and "b2" are declared in the same +// scope, meaning that from a lexical point of view, their lifetimes +// overlap. From a control flow pointer of view, however, the two +// variables are accessed in disjoint regions of the CFG, thus it +// should be possible for them to share the same stack slot. An ideal +// stack allocation for the function above would look like: +// +// slot 0: b1, b2 +// slot 1: b3, b4 +// slot 2: b5 +// +// Achieving this allocation is tricky, however, due to the way +// lifetime markers are inserted. Here is a simplified view of the +// control flow graph for the code above: +// +// +------ block 0 -------+ +// 0| LIFETIME_START b1, b2 | +// 1| <test 'if' condition> | +// +-----------------------+ +// ./ \. +// +------ block 1 -------+ +------ block 2 -------+ +// 2| LIFETIME_START b3 | 5| LIFETIME_START b4, b5 | +// 3| <uses of b1, b3> | 6| <uses of b2, b4, b5> | +// 4| LIFETIME_END b3 | 7| LIFETIME_END b4, b5 | +// +-----------------------+ +-----------------------+ +// \. /. +// +------ block 3 -------+ +// 8| <cleanupcode> | +// 9| LIFETIME_END b1, b2 | +// 10| return | +// +-----------------------+ +// +// If we create live intervals for the variables above strictly based +// on the lifetime markers, we'll get the set of intervals on the +// left. If we ignore the lifetime start markers and instead treat a +// variable's lifetime as beginning with the first reference to the +// var, then we get the intervals on the right. +// +// LIFETIME_START First Use +// b1: [0,9] [3,4] [8,9] +// b2: [0,9] [6,9] +// b3: [2,4] [3,4] +// b4: [5,7] [6,7] +// b5: [5,7] [6,7] +// +// For the intervals on the left, the best we can do is overlap two +// variables (b3 and b4, for example); this gives us a stack size of +// 4*1024 bytes, not ideal. When treating first-use as the start of a +// lifetime, we can additionally overlap b1 and b5, giving us a 3*1024 +// byte stack (better). +// +// Relying entirely on first-use of stack slots is problematic, +// however, due to the fact that optimizations can sometimes migrate +// uses of a variable outside of its lifetime start/end region. Here +// is an example: +// +// int bar() { +// char b1[1024], b2[1024]; +// if (...) { +// <uses of b2> +// return y; +// } else { +// <uses of b1> +// while (...) { +// char b3[1024]; +// <uses of b3> +// } +// } +// } +// +// Before optimization, the control flow graph for the code above +// might look like the following: +// +// +------ block 0 -------+ +// 0| LIFETIME_START b1, b2 | +// 1| <test 'if' condition> | +// +-----------------------+ +// ./ \. +// +------ block 1 -------+ +------- block 2 -------+ +// 2| <uses of b2> | 3| <uses of b1> | +// +-----------------------+ +-----------------------+ +// | | +// | +------- block 3 -------+ <-\. +// | 4| <while condition> | | +// | +-----------------------+ | +// | / | | +// | / +------- block 4 -------+ +// \ / 5| LIFETIME_START b3 | | +// \ / 6| <uses of b3> | | +// \ / 7| LIFETIME_END b3 | | +// \ | +------------------------+ | +// \ | \ / +// +------ block 5 -----+ \--------------- +// 8| <cleanupcode> | +// 9| LIFETIME_END b1, b2 | +// 10| return | +// +---------------------+ +// +// During optimization, however, it can happen that an instruction +// computing an address in "b3" (for example, a loop-invariant GEP) is +// hoisted up out of the loop from block 4 to block 2. [Note that +// this is not an actual load from the stack, only an instruction that +// computes the address to be loaded]. If this happens, there is now a +// path leading from the first use of b3 to the return instruction +// that does not encounter the b3 LIFETIME_END, hence b3's lifetime is +// now larger than if we were computing live intervals strictly based +// on lifetime markers. In the example above, this lengthened lifetime +// would mean that it would appear illegal to overlap b3 with b2. +// +// To deal with this such cases, the code in ::collectMarkers() below +// tries to identify "degenerate" slots -- those slots where on a single +// forward pass through the CFG we encounter a first reference to slot +// K before we hit the slot K lifetime start marker. For such slots, +// we fall back on using the lifetime start marker as the beginning of +// the variable's lifetime. NB: with this implementation, slots can +// appear degenerate in cases where there is unstructured control flow: +// +// if (q) goto mid; +// if (x > 9) { +// int b[100]; +// memcpy(&b[0], ...); +// mid: b[k] = ...; +// abc(&b); +// } +// +// If in RPO ordering chosen to walk the CFG we happen to visit the b[k] +// before visiting the memcpy block (which will contain the lifetime start +// for "b" then it will appear that 'b' has a degenerate lifetime. +// + //===----------------------------------------------------------------------===// // StackColoring Pass //===----------------------------------------------------------------------===// @@ -126,6 +285,17 @@ class StackColoring : public MachineFunctionPass { /// once the coloring is done. SmallVector<MachineInstr*, 8> Markers; + /// Record the FI slots for which we have seen some sort of + /// lifetime marker (either start or end). + BitVector InterestingSlots; + + /// FI slots that need to be handled conservatively (for these + /// slots lifetime-start-on-first-use is disabled). + BitVector ConservativeSlots; + + /// Number of iterations taken during data flow analysis. + unsigned NumIterations; + public: static char ID; StackColoring() : MachineFunctionPass(ID) { @@ -137,6 +307,9 @@ public: private: /// Debug. void dump() const; + void dumpIntervals() const; + void dumpBB(MachineBasicBlock *MBB) const; + void dumpBV(const char *tag, const BitVector &BV) const; /// Removes all of the lifetime marker instructions from the function. /// \returns true if any markers were removed. @@ -153,6 +326,25 @@ private: /// in and out blocks. void calculateLocalLiveness(); + /// Returns TRUE if we're using the first-use-begins-lifetime method for + /// this slot (if FALSE, then the start marker is treated as start of lifetime). + bool applyFirstUse(int Slot) { + if (!LifetimeStartOnFirstUse || ProtectFromEscapedAllocas) + return false; + if (ConservativeSlots.test(Slot)) + return false; + return true; + } + + /// Examines the specified instruction and returns TRUE if the instruction + /// represents the start or end of an interesting lifetime. The slot or slots + /// starting or ending are added to the vector "slots" and "isStart" is set + /// accordingly. + /// \returns True if inst contains a lifetime start or end + bool isLifetimeStartOrEnd(const MachineInstr &MI, + SmallVector<int, 4> &slots, + bool &isStart); + /// Construct the LiveIntervals for the slots. void calculateLiveIntervals(unsigned NumSlots); @@ -170,7 +362,10 @@ private: /// Map entries which point to other entries to their destination. /// A->B->C becomes A->C. - void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots); + void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots); + + /// Used in collectMarkers + typedef DenseMap<const MachineBasicBlock*, BitVector> BlockBitVecMap; }; } // end anonymous namespace @@ -179,55 +374,202 @@ char &llvm::StackColoringID = StackColoring::ID; INITIALIZE_PASS_BEGIN(StackColoring, "stack-coloring", "Merge disjoint stack slots", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(StackProtector) INITIALIZE_PASS_END(StackColoring, "stack-coloring", "Merge disjoint stack slots", false, false) void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<MachineDominatorTree>(); - AU.addPreserved<MachineDominatorTree>(); AU.addRequired<SlotIndexes>(); AU.addRequired<StackProtector>(); MachineFunctionPass::getAnalysisUsage(AU); } -void StackColoring::dump() const { - for (MachineBasicBlock *MBB : depth_first(MF)) { - DEBUG(dbgs() << "Inspecting block #" << BasicBlocks.lookup(MBB) << " [" - << MBB->getName() << "]\n"); +#ifndef NDEBUG - LivenessMap::const_iterator BI = BlockLiveness.find(MBB); - assert(BI != BlockLiveness.end() && "Block not found"); - const BlockLifetimeInfo &BlockInfo = BI->second; +LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag, + const BitVector &BV) const { + DEBUG(dbgs() << tag << " : { "); + for (unsigned I = 0, E = BV.size(); I != E; ++I) + DEBUG(dbgs() << BV.test(I) << " "); + DEBUG(dbgs() << "}\n"); +} + +LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const { + LivenessMap::const_iterator BI = BlockLiveness.find(MBB); + assert(BI != BlockLiveness.end() && "Block not found"); + const BlockLifetimeInfo &BlockInfo = BI->second; - DEBUG(dbgs()<<"BEGIN : {"); - for (unsigned i=0; i < BlockInfo.Begin.size(); ++i) - DEBUG(dbgs()<<BlockInfo.Begin.test(i)<<" "); - DEBUG(dbgs()<<"}\n"); + dumpBV("BEGIN", BlockInfo.Begin); + dumpBV("END", BlockInfo.End); + dumpBV("LIVE_IN", BlockInfo.LiveIn); + dumpBV("LIVE_OUT", BlockInfo.LiveOut); +} - DEBUG(dbgs()<<"END : {"); - for (unsigned i=0; i < BlockInfo.End.size(); ++i) - DEBUG(dbgs()<<BlockInfo.End.test(i)<<" "); +LLVM_DUMP_METHOD void StackColoring::dump() const { + for (MachineBasicBlock *MBB : depth_first(MF)) { + DEBUG(dbgs() << "Inspecting block #" << MBB->getNumber() << " [" + << MBB->getName() << "]\n"); + DEBUG(dumpBB(MBB)); + } +} - DEBUG(dbgs()<<"}\n"); +LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const { + for (unsigned I = 0, E = Intervals.size(); I != E; ++I) { + DEBUG(dbgs() << "Interval[" << I << "]:\n"); + DEBUG(Intervals[I]->dump()); + } +} - DEBUG(dbgs()<<"LIVE_IN: {"); - for (unsigned i=0; i < BlockInfo.LiveIn.size(); ++i) - DEBUG(dbgs()<<BlockInfo.LiveIn.test(i)<<" "); +#endif // not NDEBUG + +static inline int getStartOrEndSlot(const MachineInstr &MI) +{ + assert((MI.getOpcode() == TargetOpcode::LIFETIME_START || + MI.getOpcode() == TargetOpcode::LIFETIME_END) && + "Expected LIFETIME_START or LIFETIME_END op"); + const MachineOperand &MO = MI.getOperand(0); + int Slot = MO.getIndex(); + if (Slot >= 0) + return Slot; + return -1; +} - DEBUG(dbgs()<<"}\n"); - DEBUG(dbgs()<<"LIVEOUT: {"); - for (unsigned i=0; i < BlockInfo.LiveOut.size(); ++i) - DEBUG(dbgs()<<BlockInfo.LiveOut.test(i)<<" "); - DEBUG(dbgs()<<"}\n"); +// +// At the moment the only way to end a variable lifetime is with +// a VARIABLE_LIFETIME op (which can't contain a start). If things +// change and the IR allows for a single inst that both begins +// and ends lifetime(s), this interface will need to be reworked. +// +bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI, + SmallVector<int, 4> &slots, + bool &isStart) +{ + if (MI.getOpcode() == TargetOpcode::LIFETIME_START || + MI.getOpcode() == TargetOpcode::LIFETIME_END) { + int Slot = getStartOrEndSlot(MI); + if (Slot < 0) + return false; + if (!InterestingSlots.test(Slot)) + return false; + slots.push_back(Slot); + if (MI.getOpcode() == TargetOpcode::LIFETIME_END) { + isStart = false; + return true; + } + if (! applyFirstUse(Slot)) { + isStart = true; + return true; + } + } else if (LifetimeStartOnFirstUse && !ProtectFromEscapedAllocas) { + if (! MI.isDebugValue()) { + bool found = false; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isFI()) + continue; + int Slot = MO.getIndex(); + if (Slot<0) + continue; + if (InterestingSlots.test(Slot) && applyFirstUse(Slot)) { + slots.push_back(Slot); + found = true; + } + } + if (found) { + isStart = true; + return true; + } + } } + return false; } -unsigned StackColoring::collectMarkers(unsigned NumSlot) { +unsigned StackColoring::collectMarkers(unsigned NumSlot) +{ unsigned MarkersFound = 0; - // Scan the function to find all lifetime markers. + BlockBitVecMap SeenStartMap; + InterestingSlots.clear(); + InterestingSlots.resize(NumSlot); + ConservativeSlots.clear(); + ConservativeSlots.resize(NumSlot); + + // number of start and end lifetime ops for each slot + SmallVector<int, 8> NumStartLifetimes(NumSlot, 0); + SmallVector<int, 8> NumEndLifetimes(NumSlot, 0); + + // Step 1: collect markers and populate the "InterestingSlots" + // and "ConservativeSlots" sets. + for (MachineBasicBlock *MBB : depth_first(MF)) { + + // Compute the set of slots for which we've seen a START marker but have + // not yet seen an END marker at this point in the walk (e.g. on entry + // to this bb). + BitVector BetweenStartEnd; + BetweenStartEnd.resize(NumSlot); + for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + BlockBitVecMap::const_iterator I = SeenStartMap.find(*PI); + if (I != SeenStartMap.end()) { + BetweenStartEnd |= I->second; + } + } + + // Walk the instructions in the block to look for start/end ops. + for (MachineInstr &MI : *MBB) { + if (MI.getOpcode() == TargetOpcode::LIFETIME_START || + MI.getOpcode() == TargetOpcode::LIFETIME_END) { + int Slot = getStartOrEndSlot(MI); + if (Slot < 0) + continue; + InterestingSlots.set(Slot); + if (MI.getOpcode() == TargetOpcode::LIFETIME_START) { + BetweenStartEnd.set(Slot); + NumStartLifetimes[Slot] += 1; + } else { + BetweenStartEnd.reset(Slot); + NumEndLifetimes[Slot] += 1; + } + const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); + if (Allocation) { + DEBUG(dbgs() << "Found a lifetime "); + DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START + ? "start" + : "end")); + DEBUG(dbgs() << " marker for slot #" << Slot); + DEBUG(dbgs() << " with allocation: " << Allocation->getName() + << "\n"); + } + Markers.push_back(&MI); + MarkersFound += 1; + } else { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isFI()) + continue; + int Slot = MO.getIndex(); + if (Slot < 0) + continue; + if (! BetweenStartEnd.test(Slot)) { + ConservativeSlots.set(Slot); + } + } + } + } + BitVector &SeenStart = SeenStartMap[MBB]; + SeenStart |= BetweenStartEnd; + } + if (!MarkersFound) { + return 0; + } + + // PR27903: slots with multiple start or end lifetime ops are not + // safe to enable for "lifetime-start-on-first-use". + for (unsigned slot = 0; slot < NumSlot; ++slot) + if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1) + ConservativeSlots.set(slot); + DEBUG(dumpBV("Conservative slots", ConservativeSlots)); + + // Step 2: compute begin/end sets for each block + // NOTE: We use a reverse-post-order iteration to ensure that we obtain a // deterministic numbering, and because we'll need a post-order iteration // later for solving the liveness dataflow problem. @@ -243,35 +585,33 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { BlockInfo.Begin.resize(NumSlot); BlockInfo.End.resize(NumSlot); + SmallVector<int, 4> slots; for (MachineInstr &MI : *MBB) { - if (MI.getOpcode() != TargetOpcode::LIFETIME_START && - MI.getOpcode() != TargetOpcode::LIFETIME_END) - continue; - - Markers.push_back(&MI); - - bool IsStart = MI.getOpcode() == TargetOpcode::LIFETIME_START; - const MachineOperand &MO = MI.getOperand(0); - unsigned Slot = MO.getIndex(); - - MarkersFound++; - - const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); - if (Allocation) { - DEBUG(dbgs()<<"Found a lifetime marker for slot #"<<Slot<< - " with allocation: "<< Allocation->getName()<<"\n"); - } - - if (IsStart) { - BlockInfo.Begin.set(Slot); - } else { - if (BlockInfo.Begin.test(Slot)) { - // Allocas that start and end within a single block are handled - // specially when computing the LiveIntervals to avoid pessimizing - // the liveness propagation. - BlockInfo.Begin.reset(Slot); - } else { + bool isStart = false; + slots.clear(); + if (isLifetimeStartOrEnd(MI, slots, isStart)) { + if (!isStart) { + assert(slots.size() == 1 && "unexpected: MI ends multiple slots"); + int Slot = slots[0]; + if (BlockInfo.Begin.test(Slot)) { + BlockInfo.Begin.reset(Slot); + } BlockInfo.End.set(Slot); + } else { + for (auto Slot : slots) { + DEBUG(dbgs() << "Found a use of slot #" << Slot); + DEBUG(dbgs() << " at BB#" << MBB->getNumber() << " index "); + DEBUG(Indexes->getInstructionIndex(MI).print(dbgs())); + const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); + if (Allocation) { + DEBUG(dbgs() << " with allocation: "<< Allocation->getName()); + } + DEBUG(dbgs() << "\n"); + if (BlockInfo.End.test(Slot)) { + BlockInfo.End.reset(Slot); + } + BlockInfo.Begin.set(Slot); + } } } } @@ -282,90 +622,56 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { return MarkersFound; } -void StackColoring::calculateLocalLiveness() { - // Perform a standard reverse dataflow computation to solve for - // global liveness. The BEGIN set here is equivalent to KILL in the standard - // formulation, and END is equivalent to GEN. The result of this computation - // is a map from blocks to bitvectors where the bitvectors represent which - // allocas are live in/out of that block. - SmallPtrSet<const MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(), - BasicBlockNumbering.end()); - unsigned NumSSMIters = 0; +void StackColoring::calculateLocalLiveness() +{ + unsigned NumIters = 0; bool changed = true; while (changed) { changed = false; - ++NumSSMIters; - - SmallPtrSet<const MachineBasicBlock*, 8> NextBBSet; + ++NumIters; for (const MachineBasicBlock *BB : BasicBlockNumbering) { - if (!BBSet.count(BB)) continue; // Use an iterator to avoid repeated lookups. LivenessMap::iterator BI = BlockLiveness.find(BB); assert(BI != BlockLiveness.end() && "Block not found"); BlockLifetimeInfo &BlockInfo = BI->second; + // Compute LiveIn by unioning together the LiveOut sets of all preds. BitVector LocalLiveIn; - BitVector LocalLiveOut; - - // Forward propagation from begins to ends. for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), PE = BB->pred_end(); PI != PE; ++PI) { LivenessMap::const_iterator I = BlockLiveness.find(*PI); assert(I != BlockLiveness.end() && "Predecessor not found"); LocalLiveIn |= I->second.LiveOut; } - LocalLiveIn |= BlockInfo.End; - LocalLiveIn.reset(BlockInfo.Begin); - - // Reverse propagation from ends to begins. - for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) { - LivenessMap::const_iterator I = BlockLiveness.find(*SI); - assert(I != BlockLiveness.end() && "Successor not found"); - LocalLiveOut |= I->second.LiveIn; - } - LocalLiveOut |= BlockInfo.Begin; - LocalLiveOut.reset(BlockInfo.End); - - LocalLiveIn |= LocalLiveOut; - LocalLiveOut |= LocalLiveIn; - // After adopting the live bits, we need to turn-off the bits which - // are de-activated in this block. + // Compute LiveOut by subtracting out lifetimes that end in this + // block, then adding in lifetimes that begin in this block. If + // we have both BEGIN and END markers in the same basic block + // then we know that the BEGIN marker comes after the END, + // because we already handle the case where the BEGIN comes + // before the END when collecting the markers (and building the + // BEGIN/END vectors). + BitVector LocalLiveOut = LocalLiveIn; LocalLiveOut.reset(BlockInfo.End); - LocalLiveIn.reset(BlockInfo.Begin); - - // If we have both BEGIN and END markers in the same basic block then - // we know that the BEGIN marker comes after the END, because we already - // handle the case where the BEGIN comes before the END when collecting - // the markers (and building the BEGIN/END vectore). - // Want to enable the LIVE_IN and LIVE_OUT of slots that have both - // BEGIN and END because it means that the value lives before and after - // this basic block. - BitVector LocalEndBegin = BlockInfo.End; - LocalEndBegin &= BlockInfo.Begin; - LocalLiveIn |= LocalEndBegin; - LocalLiveOut |= LocalEndBegin; + LocalLiveOut |= BlockInfo.Begin; + // Update block LiveIn set, noting whether it has changed. if (LocalLiveIn.test(BlockInfo.LiveIn)) { changed = true; BlockInfo.LiveIn |= LocalLiveIn; - - NextBBSet.insert(BB->pred_begin(), BB->pred_end()); } + // Update block LiveOut set, noting whether it has changed. if (LocalLiveOut.test(BlockInfo.LiveOut)) { changed = true; BlockInfo.LiveOut |= LocalLiveOut; - - NextBBSet.insert(BB->succ_begin(), BB->succ_end()); } } - - BBSet = std::move(NextBBSet); }// while changed. + + NumIterations = NumIters; } void StackColoring::calculateLiveIntervals(unsigned NumSlots) { @@ -380,28 +686,22 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { Finishes.clear(); Finishes.resize(NumSlots); - // Create the interval for the basic blocks with lifetime markers in them. - for (const MachineInstr *MI : Markers) { - if (MI->getParent() != &MBB) - continue; - - assert((MI->getOpcode() == TargetOpcode::LIFETIME_START || - MI->getOpcode() == TargetOpcode::LIFETIME_END) && - "Invalid Lifetime marker"); - - bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START; - const MachineOperand &Mo = MI->getOperand(0); - int Slot = Mo.getIndex(); - assert(Slot >= 0 && "Invalid slot"); + // Create the interval for the basic blocks containing lifetime begin/end. + for (const MachineInstr &MI : MBB) { + SmallVector<int, 4> slots; + bool IsStart = false; + if (!isLifetimeStartOrEnd(MI, slots, IsStart)) + continue; SlotIndex ThisIndex = Indexes->getInstructionIndex(MI); - - if (IsStart) { - if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex) - Starts[Slot] = ThisIndex; - } else { - if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex) - Finishes[Slot] = ThisIndex; + for (auto Slot : slots) { + if (IsStart) { + if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex) + Starts[Slot] = ThisIndex; + } else { + if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex) + Finishes[Slot] = ThisIndex; + } } } @@ -417,7 +717,29 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { } for (unsigned i = 0; i < NumSlots; ++i) { - assert(Starts[i].isValid() == Finishes[i].isValid() && "Unmatched range"); + // + // When LifetimeStartOnFirstUse is turned on, data flow analysis + // is forward (from starts to ends), not bidirectional. A + // consequence of this is that we can wind up in situations + // where Starts[i] is invalid but Finishes[i] is valid and vice + // versa. Example: + // + // LIFETIME_START x + // if (...) { + // <use of x> + // throw ...; + // } + // LIFETIME_END x + // return 2; + // + // + // Here the slot for "x" will not be live into the block + // containing the "return 2" (since lifetimes start with first + // use, not at the dominating LIFETIME_START marker). + // + if (Starts[i].isValid() && !Finishes[i].isValid()) { + Finishes[i] = Indexes->getMBBEndIdx(&MBB); + } if (!Starts[i].isValid()) continue; @@ -495,10 +817,21 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { // upcoming replacement. SP->adjustForColoring(From, To); + // The new alloca might not be valid in a llvm.dbg.declare for this + // variable, so undef out the use to make the verifier happy. + AllocaInst *FromAI = const_cast<AllocaInst *>(From); + if (FromAI->isUsedByMetadata()) + ValueAsMetadata::handleRAUW(FromAI, UndefValue::get(FromAI->getType())); + for (auto &Use : FromAI->uses()) { + if (BitCastInst *BCI = dyn_cast<BitCastInst>(Use.get())) + if (BCI->isUsedByMetadata()) + ValueAsMetadata::handleRAUW(BCI, UndefValue::get(BCI->getType())); + } + // Note that this will not replace uses in MMOs (which we'll update below), // or anywhere else (which is why we won't delete the original // instruction). - const_cast<AllocaInst *>(From)->replaceAllUsesWith(Inst); + FromAI->replaceAllUsesWith(Inst); } // Remap all instructions to the new stack slots. @@ -557,7 +890,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { // If we *don't* protect the user from escaped allocas, don't bother // validating the instructions. if (!I.isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) { - SlotIndex Index = Indexes->getInstructionIndex(&I); + SlotIndex Index = Indexes->getInstructionIndex(I); const LiveInterval *Interval = &*Intervals[FromSlot]; assert(Interval->find(Index) != Interval->end() && "Found instruction usage outside of live range."); @@ -616,7 +949,7 @@ void StackColoring::removeInvalidSlotRanges() { // Check that the used slot is inside the calculated lifetime range. // If it is not, warn about it and invalidate the range. LiveInterval *Interval = &*Intervals[Slot]; - SlotIndex Index = Indexes->getInstructionIndex(&I); + SlotIndex Index = Indexes->getInstructionIndex(I); if (Interval->find(Index) == Interval->end()) { Interval->clear(); DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n"); @@ -643,9 +976,6 @@ void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap, } bool StackColoring::runOnMachineFunction(MachineFunction &Func) { - if (skipOptnoneFunction(*Func.getFunction())) - return false; - DEBUG(dbgs() << "********** Stack Coloring **********\n" << "********** Function: " << ((const Value*)Func.getFunction())->getName() << '\n'); @@ -667,7 +997,6 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { return false; SmallVector<int, 8> SortedSlots; - SortedSlots.reserve(NumSlots); Intervals.reserve(NumSlots); @@ -686,7 +1015,8 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { // Don't continue because there are not enough lifetime markers, or the // stack is too small, or we are told not to optimize the slots. - if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) { + if (NumMarkers < 2 || TotalSize < 16 || DisableColoring || + skipFunction(*Func.getFunction())) { DEBUG(dbgs()<<"Will not try to merge slots.\n"); return removeAllMarkers(); } @@ -700,9 +1030,12 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { // Calculate the liveness of each block. calculateLocalLiveness(); + DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n"); + DEBUG(dump()); // Propagate the liveness information. calculateLiveIntervals(NumSlots); + DEBUG(dumpIntervals()); // Search for allocas which are used outside of the declared lifetime // markers. diff --git a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp index 8550583..87e4eb6 100644 --- a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -62,6 +62,11 @@ public: /// information we preserve. void getAnalysisUsage(AnalysisUsage &AU) const override; + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } + /// \brief Calculate the liveness information for the given machine function. bool runOnMachineFunction(MachineFunction &MF) override; @@ -122,7 +127,8 @@ bool StackMapLiveness::calculateLiveness(MachineFunction &MF) { for (auto &MBB : MF) { DEBUG(dbgs() << "****** BB " << MBB.getName() << " ******\n"); LiveRegs.init(TRI); - LiveRegs.addLiveOuts(&MBB); + // FIXME: This should probably be addLiveOuts(). + LiveRegs.addLiveOutsNoPristines(MBB); bool HasStackMap = false; // Reverse iterate over all instructions and add the current live register // set to an instruction if we encounter a patchpoint instruction. diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp index b3cd8b3..d91bb80 100644 --- a/contrib/llvm/lib/CodeGen/StackMaps.cpp +++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp @@ -520,9 +520,9 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) { void StackMaps::serializeToStackMapSection() { (void)WSMP; // Bail out if there's no stack map data. - assert((!CSInfos.empty() || (CSInfos.empty() && ConstPool.empty())) && + assert((!CSInfos.empty() || ConstPool.empty()) && "Expected empty constant pool too!"); - assert((!CSInfos.empty() || (CSInfos.empty() && FnStackSize.empty())) && + assert((!CSInfos.empty() || FnStackSize.empty()) && "Expected empty function record too!"); if (CSInfos.empty()) return; diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp index db3fef5..89868e4 100644 --- a/contrib/llvm/lib/CodeGen/StackProtector.cpp +++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp @@ -18,12 +18,13 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" @@ -89,15 +90,25 @@ bool StackProtector::runOnFunction(Function &Fn) { getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DT = DTWP ? &DTWP->getDomTree() : nullptr; TLI = TM->getSubtargetImpl(Fn)->getTargetLowering(); + HasPrologue = false; + HasIRCheck = false; Attribute Attr = Fn.getFnAttribute("stack-protector-buffer-size"); if (Attr.isStringAttribute() && Attr.getValueAsString().getAsInteger(10, SSPBufferSize)) - return false; // Invalid integer string + return false; // Invalid integer string if (!RequiresStackProtector()) return false; + // TODO(etienneb): Functions with funclets are not correctly supported now. + // Do nothing if this is funclet-based personality. + if (Fn.hasPersonalityFn()) { + EHPersonality Personality = classifyEHPersonality(Fn.getPersonalityFn()); + if (isFuncletEHPersonality(Personality)) + return false; + } + ++NumFunProtected; return InsertStackProtectors(); } @@ -200,11 +211,24 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) { bool StackProtector::RequiresStackProtector() { bool Strong = false; bool NeedsProtector = false; + for (const BasicBlock &BB : *F) + for (const Instruction &I : BB) + if (const CallInst *CI = dyn_cast<CallInst>(&I)) + if (CI->getCalledFunction() == + Intrinsic::getDeclaration(F->getParent(), + Intrinsic::stackprotector)) + HasPrologue = true; + + if (F->hasFnAttribute(Attribute::SafeStack)) + return false; + if (F->hasFnAttribute(Attribute::StackProtectReq)) { NeedsProtector = true; Strong = true; // Use the same heuristic as strong to determine SSPLayout } else if (F->hasFnAttribute(Attribute::StackProtectStrong)) Strong = true; + else if (HasPrologue) + NeedsProtector = true; else if (!F->hasFnAttribute(Attribute::StackProtect)) return false; @@ -256,106 +280,51 @@ bool StackProtector::RequiresStackProtector() { return NeedsProtector; } -static bool InstructionWillNotHaveChain(const Instruction *I) { - return !I->mayHaveSideEffects() && !I->mayReadFromMemory() && - isSafeToSpeculativelyExecute(I); -} - -/// Identify if RI has a previous instruction in the "Tail Position" and return -/// it. Otherwise return 0. -/// -/// This is based off of the code in llvm::isInTailCallPosition. The difference -/// is that it inverts the first part of llvm::isInTailCallPosition since -/// isInTailCallPosition is checking if a call is in a tail call position, and -/// we are searching for an unknown tail call that might be in the tail call -/// position. Once we find the call though, the code uses the same refactored -/// code, returnTypeIsEligibleForTailCall. -static CallInst *FindPotentialTailCall(BasicBlock *BB, ReturnInst *RI, - const TargetLoweringBase *TLI) { - // Establish a reasonable upper bound on the maximum amount of instructions we - // will look through to find a tail call. - unsigned SearchCounter = 0; - const unsigned MaxSearch = 4; - bool NoInterposingChain = true; - - for (BasicBlock::reverse_iterator I = std::next(BB->rbegin()), E = BB->rend(); - I != E && SearchCounter < MaxSearch; ++I) { - Instruction *Inst = &*I; - - // Skip over debug intrinsics and do not allow them to affect our MaxSearch - // counter. - if (isa<DbgInfoIntrinsic>(Inst)) - continue; - - // If we find a call and the following conditions are satisifed, then we - // have found a tail call that satisfies at least the target independent - // requirements of a tail call: - // - // 1. The call site has the tail marker. - // - // 2. The call site either will not cause the creation of a chain or if a - // chain is necessary there are no instructions in between the callsite and - // the call which would create an interposing chain. - // - // 3. The return type of the function does not impede tail call - // optimization. - if (CallInst *CI = dyn_cast<CallInst>(Inst)) { - if (CI->isTailCall() && - (InstructionWillNotHaveChain(CI) || NoInterposingChain) && - returnTypeIsEligibleForTailCall(BB->getParent(), CI, RI, *TLI)) - return CI; - } - - // If we did not find a call see if we have an instruction that may create - // an interposing chain. - NoInterposingChain = - NoInterposingChain && InstructionWillNotHaveChain(Inst); - - // Increment max search. - SearchCounter++; - } - - return nullptr; +/// Create a stack guard loading and populate whether SelectionDAG SSP is +/// supported. +static Value *getStackGuard(const TargetLoweringBase *TLI, Module *M, + IRBuilder<> &B, + bool *SupportsSelectionDAGSP = nullptr) { + if (Value *Guard = TLI->getIRStackGuard(B)) + return B.CreateLoad(Guard, true, "StackGuard"); + + // Use SelectionDAG SSP handling, since there isn't an IR guard. + // + // This is more or less weird, since we optionally output whether we + // should perform a SelectionDAG SP here. The reason is that it's strictly + // defined as !TLI->getIRStackGuard(B), where getIRStackGuard is also + // mutating. There is no way to get this bit without mutating the IR, so + // getting this bit has to happen in this right time. + // + // We could have define a new function TLI::supportsSelectionDAGSP(), but that + // will put more burden on the backends' overriding work, especially when it + // actually conveys the same information getIRStackGuard() already gives. + if (SupportsSelectionDAGSP) + *SupportsSelectionDAGSP = true; + TLI->insertSSPDeclarations(*M); + return B.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackguard)); } -/// Insert code into the entry block that stores the __stack_chk_guard +/// Insert code into the entry block that stores the stack guard /// variable onto the stack: /// /// entry: /// StackGuardSlot = alloca i8* -/// StackGuard = load __stack_chk_guard -/// call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) +/// StackGuard = <stack guard> +/// call void @llvm.stackprotector(StackGuard, StackGuardSlot) /// /// Returns true if the platform/triple supports the stackprotectorcreate pseudo /// node. static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI, - const TargetLoweringBase *TLI, const Triple &TT, - AllocaInst *&AI, Value *&StackGuardVar) { + const TargetLoweringBase *TLI, AllocaInst *&AI) { bool SupportsSelectionDAGSP = false; - PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); - unsigned AddressSpace, Offset; - if (TLI->getStackCookieLocation(AddressSpace, Offset)) { - Constant *OffsetVal = - ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); - - StackGuardVar = - ConstantExpr::getIntToPtr(OffsetVal, PointerType::get(PtrTy, - AddressSpace)); - } else if (TT.isOSOpenBSD()) { - StackGuardVar = M->getOrInsertGlobal("__guard_local", PtrTy); - cast<GlobalValue>(StackGuardVar) - ->setVisibility(GlobalValue::HiddenVisibility); - } else { - SupportsSelectionDAGSP = true; - StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); - } - IRBuilder<> B(&F->getEntryBlock().front()); + PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); AI = B.CreateAlloca(PtrTy, nullptr, "StackGuardSlot"); - LoadInst *LI = B.CreateLoad(StackGuardVar, "StackGuard"); - B.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), - {LI, AI}); + Value *GuardSlot = getStackGuard(TLI, M, B, &SupportsSelectionDAGSP); + B.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), + {GuardSlot, AI}); return SupportsSelectionDAGSP; } @@ -366,11 +335,9 @@ static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI, /// - The epilogue checks the value stored in the prologue against the original /// value. It calls __stack_chk_fail if they differ. bool StackProtector::InsertStackProtectors() { - bool HasPrologue = false; bool SupportsSelectionDAGSP = EnableSelectionDAGSP && !TM->Options.EnableFastISel; AllocaInst *AI = nullptr; // Place on stack that stores the stack guard. - Value *StackGuardVar = nullptr; // The stack guard variable. for (Function::iterator I = F->begin(), E = F->end(); I != E;) { BasicBlock *BB = &*I++; @@ -378,30 +345,36 @@ bool StackProtector::InsertStackProtectors() { if (!RI) continue; + // Generate prologue instrumentation if not already generated. if (!HasPrologue) { HasPrologue = true; - SupportsSelectionDAGSP &= - CreatePrologue(F, M, RI, TLI, Trip, AI, StackGuardVar); + SupportsSelectionDAGSP &= CreatePrologue(F, M, RI, TLI, AI); } - if (SupportsSelectionDAGSP) { - // Since we have a potential tail call, insert the special stack check - // intrinsic. - Instruction *InsertionPt = nullptr; - if (CallInst *CI = FindPotentialTailCall(BB, RI, TLI)) { - InsertionPt = CI; - } else { - InsertionPt = RI; - // At this point we know that BB has a return statement so it *DOES* - // have a terminator. - assert(InsertionPt != nullptr && - "BB must have a terminator instruction at this point."); - } - - Function *Intrinsic = - Intrinsic::getDeclaration(M, Intrinsic::stackprotectorcheck); - CallInst::Create(Intrinsic, StackGuardVar, "", InsertionPt); + // SelectionDAG based code generation. Nothing else needs to be done here. + // The epilogue instrumentation is postponed to SelectionDAG. + if (SupportsSelectionDAGSP) + break; + + // Set HasIRCheck to true, so that SelectionDAG will not generate its own + // version. SelectionDAG called 'shouldEmitSDCheck' to check whether + // instrumentation has already been generated. + HasIRCheck = true; + + // Generate epilogue instrumentation. The epilogue intrumentation can be + // function-based or inlined depending on which mechanism the target is + // providing. + if (Value* GuardCheck = TLI->getSSPStackGuardCheck(*M)) { + // Generate the function-based epilogue instrumentation. + // The target provides a guard check function, generate a call to it. + IRBuilder<> B(RI); + LoadInst *Guard = B.CreateLoad(AI, true, "Guard"); + CallInst *Call = B.CreateCall(GuardCheck, {Guard}); + llvm::Function *Function = cast<llvm::Function>(GuardCheck); + Call->setAttributes(Function->getAttributes()); + Call->setCallingConv(Function->getCallingConv()); } else { + // Generate the epilogue with inline instrumentation. // If we do not support SelectionDAG based tail calls, generate IR level // tail calls. // @@ -415,7 +388,7 @@ bool StackProtector::InsertStackProtectors() { // // return: // ... - // %1 = load __stack_chk_guard + // %1 = <stack guard> // %2 = load StackGuardSlot // %3 = cmp i1 %1, %2 // br i1 %3, label %SP_return, label %CallStackCheckFailBlk @@ -450,9 +423,9 @@ bool StackProtector::InsertStackProtectors() { // Generate the stack protector instructions in the old basic block. IRBuilder<> B(BB); - LoadInst *LI1 = B.CreateLoad(StackGuardVar); - LoadInst *LI2 = B.CreateLoad(AI); - Value *Cmp = B.CreateICmpEQ(LI1, LI2); + Value *Guard = getStackGuard(TLI, M, B); + LoadInst *LI2 = B.CreateLoad(AI, true); + Value *Cmp = B.CreateICmpEQ(Guard, LI2); auto SuccessProb = BranchProbabilityInfo::getBranchProbStackProtector(true); auto FailureProb = @@ -475,6 +448,7 @@ BasicBlock *StackProtector::CreateFailBB() { LLVMContext &Context = F->getContext(); BasicBlock *FailBB = BasicBlock::Create(Context, "CallStackCheckFailBlk", F); IRBuilder<> B(FailBB); + B.SetCurrentDebugLocation(DebugLoc::get(0, 0, F->getSubprogram())); if (Trip.isOSOpenBSD()) { Constant *StackChkFail = M->getOrInsertFunction("__stack_smash_handler", @@ -491,3 +465,7 @@ BasicBlock *StackProtector::CreateFailBB() { B.CreateUnreachable(); return FailBB; } + +bool StackProtector::shouldEmitSDCheck(const BasicBlock &BB) const { + return HasPrologue && !HasIRCheck && dyn_cast<ReturnInst>(BB.getTerminator()); +} diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp index 51f4d0e..d996714 100644 --- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -107,7 +107,7 @@ namespace { bool OverlapWithAssignments(LiveInterval *li, int Color) const; int ColorSlot(LiveInterval *li); bool ColorSlots(MachineFunction &MF); - void RewriteInstruction(MachineInstr *MI, SmallVectorImpl<int> &SlotMapping, + void RewriteInstruction(MachineInstr &MI, SmallVectorImpl<int> &SlotMapping, MachineFunction &MF); bool RemoveDeadStores(MachineBasicBlock* MBB); }; @@ -145,9 +145,9 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { MachineBasicBlock *MBB = &*MBBI; for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end(); MII != EE; ++MII) { - MachineInstr *MI = &*MII; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); + MachineInstr &MI = *MII; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); if (!MO.isFI()) continue; int FI = MO.getIndex(); @@ -156,11 +156,12 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { if (!LS->hasInterval(FI)) continue; LiveInterval &li = LS->getInterval(FI); - if (!MI->isDebugValue()) + if (!MI.isDebugValue()) li.weight += LiveIntervals::getSpillWeight(false, true, MBFI, MI); } - for (MachineInstr::mmo_iterator MMOI = MI->memoperands_begin(), - EE = MI->memoperands_end(); MMOI != EE; ++MMOI) { + for (MachineInstr::mmo_iterator MMOI = MI.memoperands_begin(), + EE = MI.memoperands_end(); + MMOI != EE; ++MMOI) { MachineMemOperand *MMO = *MMOI; if (const FixedStackPseudoSourceValue *FSV = dyn_cast_or_null<FixedStackPseudoSourceValue>( @@ -325,13 +326,10 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { } // Rewrite all MO_FrameIndex operands. Look for dead stores. - for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); - MBBI != E; ++MBBI) { - MachineBasicBlock *MBB = &*MBBI; - for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end(); - MII != EE; ++MII) - RewriteInstruction(MII, SlotMapping, MF); - RemoveDeadStores(MBB); + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) + RewriteInstruction(MI, SlotMapping, MF); + RemoveDeadStores(&MBB); } // Delete unused stack slots. @@ -346,12 +344,12 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { /// RewriteInstruction - Rewrite specified instruction by replacing references /// to old frame index with new one. -void StackSlotColoring::RewriteInstruction(MachineInstr *MI, +void StackSlotColoring::RewriteInstruction(MachineInstr &MI, SmallVectorImpl<int> &SlotMapping, MachineFunction &MF) { // Update the operands. - for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, ee = MI.getNumOperands(); i != ee; ++i) { + MachineOperand &MO = MI.getOperand(i); if (!MO.isFI()) continue; int OldFI = MO.getIndex(); @@ -385,12 +383,11 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { break; int FirstSS, SecondSS; - if (TII->isStackSlotCopy(I, FirstSS, SecondSS) && - FirstSS == SecondSS && + if (TII->isStackSlotCopy(*I, FirstSS, SecondSS) && FirstSS == SecondSS && FirstSS != -1) { ++NumDead; changed = true; - toErase.push_back(I); + toErase.push_back(&*I); continue; } @@ -399,8 +396,10 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { unsigned LoadReg = 0; unsigned StoreReg = 0; - if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue; - if (!(StoreReg = TII->isStoreToStackSlot(NextMI, SecondSS))) continue; + if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS))) + continue; + if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS))) + continue; if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue; ++NumDead; @@ -408,10 +407,10 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { if (NextMI->findRegisterUseOperandIdx(LoadReg, true, nullptr) != -1) { ++NumDead; - toErase.push_back(I); + toErase.push_back(&*I); } - toErase.push_back(NextMI); + toErase.push_back(&*NextMI); ++I; } diff --git a/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp b/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp deleted file mode 100644 index 3f60e18..0000000 --- a/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp +++ /dev/null @@ -1,55 +0,0 @@ -//===-- StatepointDefaultGC.cpp - The default statepoint GC strategy ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a GCStrategy which serves as an example for the usage -// of a statepoint based lowering strategy. This GCStrategy is intended to -// suitable as a default implementation usable with any collector which can -// consume the standard stackmap format generated by statepoints, uses the -// default addrespace to distinguish between gc managed and non-gc managed -// pointers, and has reasonable relocation semantics. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/GCStrategy.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Value.h" - -using namespace llvm; - -namespace { -class StatepointGC : public GCStrategy { -public: - StatepointGC() { - UseStatepoints = true; - // These options are all gc.root specific, we specify them so that the - // gc.root lowering code doesn't run. - InitRoots = false; - NeededSafePoints = 0; - UsesMetadata = false; - CustomRoots = false; - } - Optional<bool> isGCManagedPointer(const Type *Ty) const override { - // Method is only valid on pointer typed values. - const PointerType *PT = cast<PointerType>(Ty); - // For the sake of this example GC, we arbitrarily pick addrspace(1) as our - // GC managed heap. We know that a pointer into this heap needs to be - // updated and that no other pointer does. Note that addrspace(1) is used - // only as an example, it has no special meaning, and is not reserved for - // GC usage. - return (1 == PT->getAddressSpace()); - } -}; -} - -static GCRegistry::Add<StatepointGC> X("statepoint-example", - "an example strategy for statepoint"); - -namespace llvm { -void linkStatepointExampleGC() {} -} diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp index d2fbf53..2b1fb12 100644 --- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp +++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp @@ -8,147 +8,52 @@ //===----------------------------------------------------------------------===// // // This pass duplicates basic blocks ending in unconditional branches into -// the tails of their predecessors. +// the tails of their predecessors, using the TailDuplicator utility class. // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineSSAUpdater.h" -#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TailDuplicator.h" #include "llvm/IR/Function.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "tailduplication" -STATISTIC(NumTails , "Number of tails duplicated"); -STATISTIC(NumTailDups , "Number of tail duplicated blocks"); -STATISTIC(NumInstrDups , "Additional instructions due to tail duplication"); -STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); -STATISTIC(NumAddedPHIs , "Number of phis added"); - -// Heuristic for tail duplication. -static cl::opt<unsigned> -TailDuplicateSize("tail-dup-size", - cl::desc("Maximum instructions to consider tail duplicating"), - cl::init(2), cl::Hidden); - -static cl::opt<bool> -TailDupVerify("tail-dup-verify", - cl::desc("Verify sanity of PHI instructions during taildup"), - cl::init(false), cl::Hidden); - -static cl::opt<unsigned> -TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden); - -typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy; - namespace { - /// Perform tail duplication. - class TailDuplicatePass : public MachineFunctionPass { - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - const MachineBranchProbabilityInfo *MBPI; - MachineModuleInfo *MMI; - MachineRegisterInfo *MRI; - std::unique_ptr<RegScavenger> RS; - bool PreRegAlloc; - - // A list of virtual registers for which to update SSA form. - SmallVector<unsigned, 16> SSAUpdateVRs; - - // For each virtual register in SSAUpdateVals keep a list of source virtual - // registers. - DenseMap<unsigned, AvailableValsTy> SSAUpdateVals; +/// Perform tail duplication. Delegates to TailDuplicator +class TailDuplicatePass : public MachineFunctionPass { + TailDuplicator Duplicator; - public: - static char ID; - explicit TailDuplicatePass() : - MachineFunctionPass(ID), PreRegAlloc(false) {} +public: + static char ID; + explicit TailDuplicatePass() : MachineFunctionPass(ID) {} - bool runOnMachineFunction(MachineFunction &MF) override; + bool runOnMachineFunction(MachineFunction &MF) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; - private: - void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, - MachineBasicBlock *BB); - void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB, - MachineBasicBlock *PredBB, - DenseMap<unsigned, unsigned> &LocalVRMap, - SmallVectorImpl<std::pair<unsigned,unsigned> > &Copies, - const DenseSet<unsigned> &UsedByPhi, - bool Remove); - void DuplicateInstruction(MachineInstr *MI, - MachineBasicBlock *TailBB, - MachineBasicBlock *PredBB, - MachineFunction &MF, - DenseMap<unsigned, unsigned> &LocalVRMap, - const DenseSet<unsigned> &UsedByPhi); - void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, - SmallVectorImpl<MachineBasicBlock *> &TDBBs, - SmallSetVector<MachineBasicBlock*, 8> &Succs); - bool TailDuplicateBlocks(MachineFunction &MF); - bool shouldTailDuplicate(const MachineFunction &MF, - bool IsSimple, MachineBasicBlock &TailBB); - bool isSimpleBB(MachineBasicBlock *TailBB); - bool canCompletelyDuplicateBB(MachineBasicBlock &BB); - bool duplicateSimpleBB(MachineBasicBlock *TailBB, - SmallVectorImpl<MachineBasicBlock *> &TDBBs, - const DenseSet<unsigned> &RegsUsedByPhi, - SmallVectorImpl<MachineInstr *> &Copies); - bool TailDuplicate(MachineBasicBlock *TailBB, - bool IsSimple, - MachineFunction &MF, - SmallVectorImpl<MachineBasicBlock *> &TDBBs, - SmallVectorImpl<MachineInstr *> &Copies); - bool TailDuplicateAndUpdate(MachineBasicBlock *MBB, - bool IsSimple, - MachineFunction &MF); - - void RemoveDeadBlock(MachineBasicBlock *MBB); - }; - - char TailDuplicatePass::ID = 0; +char TailDuplicatePass::ID = 0; } char &llvm::TailDuplicateID = TailDuplicatePass::ID; -INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication", - false, false) +INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication", false, + false) bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { - if (skipOptnoneFunction(*MF.getFunction())) + if (skipFunction(*MF.getFunction())) return false; - TII = MF.getSubtarget().getInstrInfo(); - TRI = MF.getSubtarget().getRegisterInfo(); - MRI = &MF.getRegInfo(); - MMI = getAnalysisIfAvailable<MachineModuleInfo>(); - MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + auto MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); - PreRegAlloc = MRI->isSSA(); - RS.reset(); - if (MRI->tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF)) - RS.reset(new RegScavenger()); + Duplicator.initMF(MF, MMI, MBPI); bool MadeChange = false; - while (TailDuplicateBlocks(MF)) + while (Duplicator.tailDuplicateBlocks(MF)) MadeChange = true; return MadeChange; @@ -158,831 +63,3 @@ void TailDuplicatePass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineBranchProbabilityInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } - -static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { - for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { - MachineBasicBlock *MBB = &*I; - SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(), - MBB->pred_end()); - MachineBasicBlock::iterator MI = MBB->begin(); - while (MI != MBB->end()) { - if (!MI->isPHI()) - break; - for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), - PE = Preds.end(); PI != PE; ++PI) { - MachineBasicBlock *PredBB = *PI; - bool Found = false; - for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { - MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); - if (PHIBB == PredBB) { - Found = true; - break; - } - } - if (!Found) { - dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; - dbgs() << " missing input from predecessor BB#" - << PredBB->getNumber() << '\n'; - llvm_unreachable(nullptr); - } - } - - for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { - MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); - if (CheckExtra && !Preds.count(PHIBB)) { - dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() - << ": " << *MI; - dbgs() << " extra input from predecessor BB#" - << PHIBB->getNumber() << '\n'; - llvm_unreachable(nullptr); - } - if (PHIBB->getNumber() < 0) { - dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; - dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; - llvm_unreachable(nullptr); - } - } - ++MI; - } - } -} - -/// Tail duplicate the block and cleanup. -bool -TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB, - bool IsSimple, - MachineFunction &MF) { - // Save the successors list. - SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(), - MBB->succ_end()); - - SmallVector<MachineBasicBlock*, 8> TDBBs; - SmallVector<MachineInstr*, 16> Copies; - if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies)) - return false; - - ++NumTails; - - SmallVector<MachineInstr*, 8> NewPHIs; - MachineSSAUpdater SSAUpdate(MF, &NewPHIs); - - // TailBB's immediate successors are now successors of those predecessors - // which duplicated TailBB. Add the predecessors as sources to the PHI - // instructions. - bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken(); - if (PreRegAlloc) - UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); - - // If it is dead, remove it. - if (isDead) { - NumInstrDups -= MBB->size(); - RemoveDeadBlock(MBB); - ++NumDeadBlocks; - } - - // Update SSA form. - if (!SSAUpdateVRs.empty()) { - for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { - unsigned VReg = SSAUpdateVRs[i]; - SSAUpdate.Initialize(VReg); - - // If the original definition is still around, add it as an available - // value. - MachineInstr *DefMI = MRI->getVRegDef(VReg); - MachineBasicBlock *DefBB = nullptr; - if (DefMI) { - DefBB = DefMI->getParent(); - SSAUpdate.AddAvailableValue(DefBB, VReg); - } - - // Add the new vregs as available values. - DenseMap<unsigned, AvailableValsTy>::iterator LI = - SSAUpdateVals.find(VReg); - for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { - MachineBasicBlock *SrcBB = LI->second[j].first; - unsigned SrcReg = LI->second[j].second; - SSAUpdate.AddAvailableValue(SrcBB, SrcReg); - } - - // Rewrite uses that are outside of the original def's block. - MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); - while (UI != MRI->use_end()) { - MachineOperand &UseMO = *UI; - MachineInstr *UseMI = UseMO.getParent(); - ++UI; - if (UseMI->isDebugValue()) { - // SSAUpdate can replace the use with an undef. That creates - // a debug instruction that is a kill. - // FIXME: Should it SSAUpdate job to delete debug instructions - // instead of replacing the use with undef? - UseMI->eraseFromParent(); - continue; - } - if (UseMI->getParent() == DefBB && !UseMI->isPHI()) - continue; - SSAUpdate.RewriteUse(UseMO); - } - } - - SSAUpdateVRs.clear(); - SSAUpdateVals.clear(); - } - - // Eliminate some of the copies inserted by tail duplication to maintain - // SSA form. - for (unsigned i = 0, e = Copies.size(); i != e; ++i) { - MachineInstr *Copy = Copies[i]; - if (!Copy->isCopy()) - continue; - unsigned Dst = Copy->getOperand(0).getReg(); - unsigned Src = Copy->getOperand(1).getReg(); - if (MRI->hasOneNonDBGUse(Src) && - MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) { - // Copy is the only use. Do trivial copy propagation here. - MRI->replaceRegWith(Dst, Src); - Copy->eraseFromParent(); - } - } - - if (NewPHIs.size()) - NumAddedPHIs += NewPHIs.size(); - - return true; -} - -/// Look for small blocks that are unconditionally branched to and do not fall -/// through. Tail-duplicate their instructions into their predecessors to -/// eliminate (dynamic) branches. -bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { - bool MadeChange = false; - - if (PreRegAlloc && TailDupVerify) { - DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); - VerifyPHIs(MF, true); - } - - for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { - MachineBasicBlock *MBB = &*I++; - - if (NumTails == TailDupLimit) - break; - - bool IsSimple = isSimpleBB(MBB); - - if (!shouldTailDuplicate(MF, IsSimple, *MBB)) - continue; - - MadeChange |= TailDuplicateAndUpdate(MBB, IsSimple, MF); - } - - if (PreRegAlloc && TailDupVerify) - VerifyPHIs(MF, false); - - return MadeChange; -} - -static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, - const MachineRegisterInfo *MRI) { - for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { - if (UseMI.isDebugValue()) - continue; - if (UseMI.getParent() != BB) - return true; - } - return false; -} - -static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { - for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) - if (MI->getOperand(i+1).getMBB() == SrcBB) - return i; - return 0; -} - - -// Remember which registers are used by phis in this block. This is -// used to determine which registers are liveout while modifying the -// block (which is why we need to copy the information). -static void getRegsUsedByPHIs(const MachineBasicBlock &BB, - DenseSet<unsigned> *UsedByPhi) { - for (const auto &MI : BB) { - if (!MI.isPHI()) - break; - for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { - unsigned SrcReg = MI.getOperand(i).getReg(); - UsedByPhi->insert(SrcReg); - } - } -} - -/// Add a definition and source virtual registers pair for SSA update. -void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, - MachineBasicBlock *BB) { - DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg); - if (LI != SSAUpdateVals.end()) - LI->second.push_back(std::make_pair(BB, NewReg)); - else { - AvailableValsTy Vals; - Vals.push_back(std::make_pair(BB, NewReg)); - SSAUpdateVals.insert(std::make_pair(OrigReg, Vals)); - SSAUpdateVRs.push_back(OrigReg); - } -} - -/// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the -/// source register that's contributed by PredBB and update SSA update map. -void TailDuplicatePass::ProcessPHI( - MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, - DenseMap<unsigned, unsigned> &LocalVRMap, - SmallVectorImpl<std::pair<unsigned, unsigned> > &Copies, - const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) { - unsigned DefReg = MI->getOperand(0).getReg(); - unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); - assert(SrcOpIdx && "Unable to find matching PHI source?"); - unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg(); - const TargetRegisterClass *RC = MRI->getRegClass(DefReg); - LocalVRMap.insert(std::make_pair(DefReg, SrcReg)); - - // Insert a copy from source to the end of the block. The def register is the - // available value liveout of the block. - unsigned NewDef = MRI->createVirtualRegister(RC); - Copies.push_back(std::make_pair(NewDef, SrcReg)); - if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg)) - AddSSAUpdateEntry(DefReg, NewDef, PredBB); - - if (!Remove) - return; - - // Remove PredBB from the PHI node. - MI->RemoveOperand(SrcOpIdx+1); - MI->RemoveOperand(SrcOpIdx); - if (MI->getNumOperands() == 1) - MI->eraseFromParent(); -} - -/// Duplicate a TailBB instruction to PredBB and update -/// the source operands due to earlier PHI translation. -void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, - MachineBasicBlock *TailBB, - MachineBasicBlock *PredBB, - MachineFunction &MF, - DenseMap<unsigned, unsigned> &LocalVRMap, - const DenseSet<unsigned> &UsedByPhi) { - MachineInstr *NewMI = TII->duplicate(MI, MF); - for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = NewMI->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - if (MO.isDef()) { - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - unsigned NewReg = MRI->createVirtualRegister(RC); - MO.setReg(NewReg); - LocalVRMap.insert(std::make_pair(Reg, NewReg)); - if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg)) - AddSSAUpdateEntry(Reg, NewReg, PredBB); - } else { - DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg); - if (VI != LocalVRMap.end()) { - MO.setReg(VI->second); - // Clear any kill flags from this operand. The new register could have - // uses after this one, so kills are not valid here. - MO.setIsKill(false); - MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg)); - } - } - } - PredBB->insert(PredBB->instr_end(), NewMI); -} - -/// After FromBB is tail duplicated into its predecessor blocks, the successors -/// have gained new predecessors. Update the PHI instructions in them -/// accordingly. -void -TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, - SmallVectorImpl<MachineBasicBlock *> &TDBBs, - SmallSetVector<MachineBasicBlock*,8> &Succs) { - for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(), - SE = Succs.end(); SI != SE; ++SI) { - MachineBasicBlock *SuccBB = *SI; - for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end(); - II != EE; ++II) { - if (!II->isPHI()) - break; - MachineInstrBuilder MIB(*FromBB->getParent(), II); - unsigned Idx = 0; - for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { - MachineOperand &MO = II->getOperand(i+1); - if (MO.getMBB() == FromBB) { - Idx = i; - break; - } - } - - assert(Idx != 0); - MachineOperand &MO0 = II->getOperand(Idx); - unsigned Reg = MO0.getReg(); - if (isDead) { - // Folded into the previous BB. - // There could be duplicate phi source entries. FIXME: Should sdisel - // or earlier pass fixed this? - for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) { - MachineOperand &MO = II->getOperand(i+1); - if (MO.getMBB() == FromBB) { - II->RemoveOperand(i+1); - II->RemoveOperand(i); - } - } - } else - Idx = 0; - - // If Idx is set, the operands at Idx and Idx+1 must be removed. - // We reuse the location to avoid expensive RemoveOperand calls. - - DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg); - if (LI != SSAUpdateVals.end()) { - // This register is defined in the tail block. - for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { - MachineBasicBlock *SrcBB = LI->second[j].first; - // If we didn't duplicate a bb into a particular predecessor, we - // might still have added an entry to SSAUpdateVals to correcly - // recompute SSA. If that case, avoid adding a dummy extra argument - // this PHI. - if (!SrcBB->isSuccessor(SuccBB)) - continue; - - unsigned SrcReg = LI->second[j].second; - if (Idx != 0) { - II->getOperand(Idx).setReg(SrcReg); - II->getOperand(Idx+1).setMBB(SrcBB); - Idx = 0; - } else { - MIB.addReg(SrcReg).addMBB(SrcBB); - } - } - } else { - // Live in tail block, must also be live in predecessors. - for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { - MachineBasicBlock *SrcBB = TDBBs[j]; - if (Idx != 0) { - II->getOperand(Idx).setReg(Reg); - II->getOperand(Idx+1).setMBB(SrcBB); - Idx = 0; - } else { - MIB.addReg(Reg).addMBB(SrcBB); - } - } - } - if (Idx != 0) { - II->RemoveOperand(Idx+1); - II->RemoveOperand(Idx); - } - } - } -} - -/// Determine if it is profitable to duplicate this block. -bool -TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, - bool IsSimple, - MachineBasicBlock &TailBB) { - // Only duplicate blocks that end with unconditional branches. - if (TailBB.canFallThrough()) - return false; - - // Don't try to tail-duplicate single-block loops. - if (TailBB.isSuccessor(&TailBB)) - return false; - - // Set the limit on the cost to duplicate. When optimizing for size, - // duplicate only one, because one branch instruction can be eliminated to - // compensate for the duplication. - unsigned MaxDuplicateCount; - if (TailDuplicateSize.getNumOccurrences() == 0 && - // FIXME: Use Function::optForSize(). - MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) - MaxDuplicateCount = 1; - else - MaxDuplicateCount = TailDuplicateSize; - - // If the target has hardware branch prediction that can handle indirect - // branches, duplicating them can often make them predictable when there - // are common paths through the code. The limit needs to be high enough - // to allow undoing the effects of tail merging and other optimizations - // that rearrange the predecessors of the indirect branch. - - bool HasIndirectbr = false; - if (!TailBB.empty()) - HasIndirectbr = TailBB.back().isIndirectBranch(); - - if (HasIndirectbr && PreRegAlloc) - MaxDuplicateCount = 20; - - // Check the instructions in the block to determine whether tail-duplication - // is invalid or unlikely to be profitable. - unsigned InstrCount = 0; - for (MachineInstr &MI : TailBB) { - // Non-duplicable things shouldn't be tail-duplicated. - if (MI.isNotDuplicable()) - return false; - - // Do not duplicate 'return' instructions if this is a pre-regalloc run. - // A return may expand into a lot more instructions (e.g. reload of callee - // saved registers) after PEI. - if (PreRegAlloc && MI.isReturn()) - return false; - - // Avoid duplicating calls before register allocation. Calls presents a - // barrier to register allocation so duplicating them may end up increasing - // spills. - if (PreRegAlloc && MI.isCall()) - return false; - - if (!MI.isPHI() && !MI.isDebugValue()) - InstrCount += 1; - - if (InstrCount > MaxDuplicateCount) - return false; - } - - // Check if any of the successors of TailBB has a PHI node in which the - // value corresponding to TailBB uses a subregister. - // If a phi node uses a register paired with a subregister, the actual - // "value type" of the phi may differ from the type of the register without - // any subregisters. Due to a bug, tail duplication may add a new operand - // without a necessary subregister, producing an invalid code. This is - // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll. - // Disable tail duplication for this case for now, until the problem is - // fixed. - for (auto SB : TailBB.successors()) { - for (auto &I : *SB) { - if (!I.isPHI()) - break; - unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB); - assert(Idx != 0); - MachineOperand &PU = I.getOperand(Idx); - if (PU.getSubReg() != 0) - return false; - } - } - - if (HasIndirectbr && PreRegAlloc) - return true; - - if (IsSimple) - return true; - - if (!PreRegAlloc) - return true; - - return canCompletelyDuplicateBB(TailBB); -} - -/// True if this BB has only one unconditional jump. -bool -TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) { - if (TailBB->succ_size() != 1) - return false; - if (TailBB->pred_empty()) - return false; - MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr(); - if (I == TailBB->end()) - return true; - return I->isUnconditionalBranch(); -} - -static bool -bothUsedInPHI(const MachineBasicBlock &A, - SmallPtrSet<MachineBasicBlock*, 8> SuccsB) { - for (MachineBasicBlock *BB : A.successors()) - if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI()) - return true; - - return false; -} - -bool -TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) { - for (MachineBasicBlock *PredBB : BB.predecessors()) { - if (PredBB->succ_size() > 1) - return false; - - MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; - SmallVector<MachineOperand, 4> PredCond; - if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) - return false; - - if (!PredCond.empty()) - return false; - } - return true; -} - -bool -TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, - SmallVectorImpl<MachineBasicBlock *> &TDBBs, - const DenseSet<unsigned> &UsedByPhi, - SmallVectorImpl<MachineInstr *> &Copies) { - SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(), - TailBB->succ_end()); - SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), - TailBB->pred_end()); - bool Changed = false; - for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), - PE = Preds.end(); PI != PE; ++PI) { - MachineBasicBlock *PredBB = *PI; - - if (PredBB->hasEHPadSuccessor()) - continue; - - if (bothUsedInPHI(*PredBB, Succs)) - continue; - - MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; - SmallVector<MachineOperand, 4> PredCond; - if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) - continue; - - Changed = true; - DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB - << "From simple Succ: " << *TailBB); - - MachineBasicBlock *NewTarget = *TailBB->succ_begin(); - MachineBasicBlock *NextBB = &*std::next(PredBB->getIterator()); - - // Make PredFBB explicit. - if (PredCond.empty()) - PredFBB = PredTBB; - - // Make fall through explicit. - if (!PredTBB) - PredTBB = NextBB; - if (!PredFBB) - PredFBB = NextBB; - - // Redirect - if (PredFBB == TailBB) - PredFBB = NewTarget; - if (PredTBB == TailBB) - PredTBB = NewTarget; - - // Make the branch unconditional if possible - if (PredTBB == PredFBB) { - PredCond.clear(); - PredFBB = nullptr; - } - - // Avoid adding fall through branches. - if (PredFBB == NextBB) - PredFBB = nullptr; - if (PredTBB == NextBB && PredFBB == nullptr) - PredTBB = nullptr; - - TII->RemoveBranch(*PredBB); - - if (PredTBB) - TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc()); - - if (!PredBB->isSuccessor(NewTarget)) - PredBB->replaceSuccessor(TailBB, NewTarget); - else { - PredBB->removeSuccessor(TailBB, true); - assert(PredBB->succ_size() <= 1); - } - - TDBBs.push_back(PredBB); - } - return Changed; -} - -/// If it is profitable, duplicate TailBB's contents in each -/// of its predecessors. -bool -TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, - bool IsSimple, - MachineFunction &MF, - SmallVectorImpl<MachineBasicBlock *> &TDBBs, - SmallVectorImpl<MachineInstr *> &Copies) { - DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); - - DenseSet<unsigned> UsedByPhi; - getRegsUsedByPHIs(*TailBB, &UsedByPhi); - - if (IsSimple) - return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); - - // Iterate through all the unique predecessors and tail-duplicate this - // block into them, if possible. Copying the list ahead of time also - // avoids trouble with the predecessor list reallocating. - bool Changed = false; - SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), - TailBB->pred_end()); - for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), - PE = Preds.end(); PI != PE; ++PI) { - MachineBasicBlock *PredBB = *PI; - - assert(TailBB != PredBB && - "Single-block loop should have been rejected earlier!"); - // EH edges are ignored by AnalyzeBranch. - if (PredBB->succ_size() > 1) - continue; - - MachineBasicBlock *PredTBB, *PredFBB; - SmallVector<MachineOperand, 4> PredCond; - if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) - continue; - if (!PredCond.empty()) - continue; - // Don't duplicate into a fall-through predecessor (at least for now). - if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) - continue; - - DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB - << "From Succ: " << *TailBB); - - TDBBs.push_back(PredBB); - - // Remove PredBB's unconditional branch. - TII->RemoveBranch(*PredBB); - - if (RS && !TailBB->livein_empty()) { - // Update PredBB livein. - RS->enterBasicBlock(PredBB); - if (!PredBB->empty()) - RS->forward(std::prev(PredBB->end())); - for (const auto &LI : TailBB->liveins()) { - if (!RS->isRegUsed(LI.PhysReg, false)) - // If a register is previously livein to the tail but it's not live - // at the end of predecessor BB, then it should be added to its - // livein list. - PredBB->addLiveIn(LI); - } - } - - // Clone the contents of TailBB into PredBB. - DenseMap<unsigned, unsigned> LocalVRMap; - SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; - // Use instr_iterator here to properly handle bundles, e.g. - // ARM Thumb2 IT block. - MachineBasicBlock::instr_iterator I = TailBB->instr_begin(); - while (I != TailBB->instr_end()) { - MachineInstr *MI = &*I; - ++I; - if (MI->isPHI()) { - // Replace the uses of the def of the PHI with the register coming - // from PredBB. - ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); - } else { - // Replace def of virtual registers with new registers, and update - // uses with PHI source register or the new registers. - DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi); - } - } - MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); - for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { - Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), - TII->get(TargetOpcode::COPY), - CopyInfos[i].first).addReg(CopyInfos[i].second)); - } - - // Simplify - TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); - - NumInstrDups += TailBB->size() - 1; // subtract one for removed branch - - // Update the CFG. - PredBB->removeSuccessor(PredBB->succ_begin()); - assert(PredBB->succ_empty() && - "TailDuplicate called on block with multiple successors!"); - for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), - E = TailBB->succ_end(); I != E; ++I) - PredBB->addSuccessor(*I, MBPI->getEdgeProbability(TailBB, I)); - - Changed = true; - ++NumTailDups; - } - - // If TailBB was duplicated into all its predecessors except for the prior - // block, which falls through unconditionally, move the contents of this - // block into the prior block. - MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator()); - MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; - SmallVector<MachineOperand, 4> PriorCond; - // This has to check PrevBB->succ_size() because EH edges are ignored by - // AnalyzeBranch. - if (PrevBB->succ_size() == 1 && - !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && - PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && - !TailBB->hasAddressTaken()) { - DEBUG(dbgs() << "\nMerging into block: " << *PrevBB - << "From MBB: " << *TailBB); - if (PreRegAlloc) { - DenseMap<unsigned, unsigned> LocalVRMap; - SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; - MachineBasicBlock::iterator I = TailBB->begin(); - // Process PHI instructions first. - while (I != TailBB->end() && I->isPHI()) { - // Replace the uses of the def of the PHI with the register coming - // from PredBB. - MachineInstr *MI = &*I++; - ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); - if (MI->getParent()) - MI->eraseFromParent(); - } - - // Now copy the non-PHI instructions. - while (I != TailBB->end()) { - // Replace def of virtual registers with new registers, and update - // uses with PHI source register or the new registers. - MachineInstr *MI = &*I++; - assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); - DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi); - MI->eraseFromParent(); - } - MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); - for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { - Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(), - TII->get(TargetOpcode::COPY), - CopyInfos[i].first) - .addReg(CopyInfos[i].second)); - } - } else { - // No PHIs to worry about, just splice the instructions over. - PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); - } - PrevBB->removeSuccessor(PrevBB->succ_begin()); - assert(PrevBB->succ_empty()); - PrevBB->transferSuccessors(TailBB); - TDBBs.push_back(PrevBB); - Changed = true; - } - - // If this is after register allocation, there are no phis to fix. - if (!PreRegAlloc) - return Changed; - - // If we made no changes so far, we are safe. - if (!Changed) - return Changed; - - - // Handle the nasty case in that we duplicated a block that is part of a loop - // into some but not all of its predecessors. For example: - // 1 -> 2 <-> 3 | - // \ | - // \---> rest | - // if we duplicate 2 into 1 but not into 3, we end up with - // 12 -> 3 <-> 2 -> rest | - // \ / | - // \----->-----/ | - // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced - // with a phi in 3 (which now dominates 2). - // What we do here is introduce a copy in 3 of the register defined by the - // phi, just like when we are duplicating 2 into 3, but we don't copy any - // real instructions or remove the 3 -> 2 edge from the phi in 2. - for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), - PE = Preds.end(); PI != PE; ++PI) { - MachineBasicBlock *PredBB = *PI; - if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end()) - continue; - - // EH edges - if (PredBB->succ_size() != 1) - continue; - - DenseMap<unsigned, unsigned> LocalVRMap; - SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; - MachineBasicBlock::iterator I = TailBB->begin(); - // Process PHI instructions first. - while (I != TailBB->end() && I->isPHI()) { - // Replace the uses of the def of the PHI with the register coming - // from PredBB. - MachineInstr *MI = &*I++; - ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); - } - MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); - for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { - Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), - TII->get(TargetOpcode::COPY), - CopyInfos[i].first).addReg(CopyInfos[i].second)); - } - } - - return Changed; -} - -/// Remove the specified dead machine basic block from the function, updating -/// the CFG. -void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) { - assert(MBB->pred_empty() && "MBB must be dead!"); - DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); - - // Remove all successors. - while (!MBB->succ_empty()) - MBB->removeSuccessor(MBB->succ_end()-1); - - // Remove the block. - MBB->eraseFromParent(); -} diff --git a/contrib/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp new file mode 100644 index 0000000..847a093 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp @@ -0,0 +1,932 @@ +//===-- TailDuplicator.cpp - Duplicate blocks into predecessors' tails ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This utility class duplicates basic blocks ending in unconditional branches +// into the tails of their predecessors. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/TailDuplicator.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "tailduplication" + +STATISTIC(NumTails, "Number of tails duplicated"); +STATISTIC(NumTailDups, "Number of tail duplicated blocks"); +STATISTIC(NumTailDupAdded, + "Number of instructions added due to tail duplication"); +STATISTIC(NumTailDupRemoved, + "Number of instructions removed due to tail duplication"); +STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); +STATISTIC(NumAddedPHIs, "Number of phis added"); + +// Heuristic for tail duplication. +static cl::opt<unsigned> TailDuplicateSize( + "tail-dup-size", + cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2), + cl::Hidden); + +static cl::opt<bool> + TailDupVerify("tail-dup-verify", + cl::desc("Verify sanity of PHI instructions during taildup"), + cl::init(false), cl::Hidden); + +static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U), + cl::Hidden); + +namespace llvm { + +void TailDuplicator::initMF(MachineFunction &MF, const MachineModuleInfo *MMIin, + const MachineBranchProbabilityInfo *MBPIin) { + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + MMI = MMIin; + MBPI = MBPIin; + + assert(MBPI != nullptr && "Machine Branch Probability Info required"); + + PreRegAlloc = MRI->isSSA(); +} + +static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { + for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = &*I; + SmallSetVector<MachineBasicBlock *, 8> Preds(MBB->pred_begin(), + MBB->pred_end()); + MachineBasicBlock::iterator MI = MBB->begin(); + while (MI != MBB->end()) { + if (!MI->isPHI()) + break; + for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), + PE = Preds.end(); + PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + bool Found = false; + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { + MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB(); + if (PHIBB == PredBB) { + Found = true; + break; + } + } + if (!Found) { + dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; + dbgs() << " missing input from predecessor BB#" + << PredBB->getNumber() << '\n'; + llvm_unreachable(nullptr); + } + } + + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { + MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB(); + if (CheckExtra && !Preds.count(PHIBB)) { + dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() << ": " + << *MI; + dbgs() << " extra input from predecessor BB#" << PHIBB->getNumber() + << '\n'; + llvm_unreachable(nullptr); + } + if (PHIBB->getNumber() < 0) { + dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; + dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; + llvm_unreachable(nullptr); + } + } + ++MI; + } + } +} + +/// Tail duplicate the block and cleanup. +bool TailDuplicator::tailDuplicateAndUpdate(MachineFunction &MF, bool IsSimple, + MachineBasicBlock *MBB) { + // Save the successors list. + SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(), + MBB->succ_end()); + + SmallVector<MachineBasicBlock *, 8> TDBBs; + SmallVector<MachineInstr *, 16> Copies; + if (!tailDuplicate(MF, IsSimple, MBB, TDBBs, Copies)) + return false; + + ++NumTails; + + SmallVector<MachineInstr *, 8> NewPHIs; + MachineSSAUpdater SSAUpdate(MF, &NewPHIs); + + // TailBB's immediate successors are now successors of those predecessors + // which duplicated TailBB. Add the predecessors as sources to the PHI + // instructions. + bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken(); + if (PreRegAlloc) + updateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); + + // If it is dead, remove it. + if (isDead) { + NumTailDupRemoved += MBB->size(); + removeDeadBlock(MBB); + ++NumDeadBlocks; + } + + // Update SSA form. + if (!SSAUpdateVRs.empty()) { + for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { + unsigned VReg = SSAUpdateVRs[i]; + SSAUpdate.Initialize(VReg); + + // If the original definition is still around, add it as an available + // value. + MachineInstr *DefMI = MRI->getVRegDef(VReg); + MachineBasicBlock *DefBB = nullptr; + if (DefMI) { + DefBB = DefMI->getParent(); + SSAUpdate.AddAvailableValue(DefBB, VReg); + } + + // Add the new vregs as available values. + DenseMap<unsigned, AvailableValsTy>::iterator LI = + SSAUpdateVals.find(VReg); + for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { + MachineBasicBlock *SrcBB = LI->second[j].first; + unsigned SrcReg = LI->second[j].second; + SSAUpdate.AddAvailableValue(SrcBB, SrcReg); + } + + // Rewrite uses that are outside of the original def's block. + MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); + while (UI != MRI->use_end()) { + MachineOperand &UseMO = *UI; + MachineInstr *UseMI = UseMO.getParent(); + ++UI; + if (UseMI->isDebugValue()) { + // SSAUpdate can replace the use with an undef. That creates + // a debug instruction that is a kill. + // FIXME: Should it SSAUpdate job to delete debug instructions + // instead of replacing the use with undef? + UseMI->eraseFromParent(); + continue; + } + if (UseMI->getParent() == DefBB && !UseMI->isPHI()) + continue; + SSAUpdate.RewriteUse(UseMO); + } + } + + SSAUpdateVRs.clear(); + SSAUpdateVals.clear(); + } + + // Eliminate some of the copies inserted by tail duplication to maintain + // SSA form. + for (unsigned i = 0, e = Copies.size(); i != e; ++i) { + MachineInstr *Copy = Copies[i]; + if (!Copy->isCopy()) + continue; + unsigned Dst = Copy->getOperand(0).getReg(); + unsigned Src = Copy->getOperand(1).getReg(); + if (MRI->hasOneNonDBGUse(Src) && + MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) { + // Copy is the only use. Do trivial copy propagation here. + MRI->replaceRegWith(Dst, Src); + Copy->eraseFromParent(); + } + } + + if (NewPHIs.size()) + NumAddedPHIs += NewPHIs.size(); + + return true; +} + +/// Look for small blocks that are unconditionally branched to and do not fall +/// through. Tail-duplicate their instructions into their predecessors to +/// eliminate (dynamic) branches. +bool TailDuplicator::tailDuplicateBlocks(MachineFunction &MF) { + bool MadeChange = false; + + if (PreRegAlloc && TailDupVerify) { + DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); + VerifyPHIs(MF, true); + } + + for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E;) { + MachineBasicBlock *MBB = &*I++; + + if (NumTails == TailDupLimit) + break; + + bool IsSimple = isSimpleBB(MBB); + + if (!shouldTailDuplicate(MF, IsSimple, *MBB)) + continue; + + MadeChange |= tailDuplicateAndUpdate(MF, IsSimple, MBB); + } + + if (PreRegAlloc && TailDupVerify) + VerifyPHIs(MF, false); + + return MadeChange; +} + +static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, + const MachineRegisterInfo *MRI) { + for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { + if (UseMI.isDebugValue()) + continue; + if (UseMI.getParent() != BB) + return true; + } + return false; +} + +static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) + if (MI->getOperand(i + 1).getMBB() == SrcBB) + return i; + return 0; +} + +// Remember which registers are used by phis in this block. This is +// used to determine which registers are liveout while modifying the +// block (which is why we need to copy the information). +static void getRegsUsedByPHIs(const MachineBasicBlock &BB, + DenseSet<unsigned> *UsedByPhi) { + for (const auto &MI : BB) { + if (!MI.isPHI()) + break; + for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { + unsigned SrcReg = MI.getOperand(i).getReg(); + UsedByPhi->insert(SrcReg); + } + } +} + +/// Add a definition and source virtual registers pair for SSA update. +void TailDuplicator::addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, + MachineBasicBlock *BB) { + DenseMap<unsigned, AvailableValsTy>::iterator LI = + SSAUpdateVals.find(OrigReg); + if (LI != SSAUpdateVals.end()) + LI->second.push_back(std::make_pair(BB, NewReg)); + else { + AvailableValsTy Vals; + Vals.push_back(std::make_pair(BB, NewReg)); + SSAUpdateVals.insert(std::make_pair(OrigReg, Vals)); + SSAUpdateVRs.push_back(OrigReg); + } +} + +/// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the +/// source register that's contributed by PredBB and update SSA update map. +void TailDuplicator::processPHI( + MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, + DenseMap<unsigned, RegSubRegPair> &LocalVRMap, + SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies, + const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) { + unsigned DefReg = MI->getOperand(0).getReg(); + unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); + assert(SrcOpIdx && "Unable to find matching PHI source?"); + unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg(); + unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg(); + const TargetRegisterClass *RC = MRI->getRegClass(DefReg); + LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg))); + + // Insert a copy from source to the end of the block. The def register is the + // available value liveout of the block. + unsigned NewDef = MRI->createVirtualRegister(RC); + Copies.push_back(std::make_pair(NewDef, RegSubRegPair(SrcReg, SrcSubReg))); + if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg)) + addSSAUpdateEntry(DefReg, NewDef, PredBB); + + if (!Remove) + return; + + // Remove PredBB from the PHI node. + MI->RemoveOperand(SrcOpIdx + 1); + MI->RemoveOperand(SrcOpIdx); + if (MI->getNumOperands() == 1) + MI->eraseFromParent(); +} + +/// Duplicate a TailBB instruction to PredBB and update +/// the source operands due to earlier PHI translation. +void TailDuplicator::duplicateInstruction( + MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, + MachineFunction &MF, + DenseMap<unsigned, RegSubRegPair> &LocalVRMap, + const DenseSet<unsigned> &UsedByPhi) { + MachineInstr *NewMI = TII->duplicate(*MI, MF); + if (PreRegAlloc) { + for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = NewMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + if (MO.isDef()) { + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + unsigned NewReg = MRI->createVirtualRegister(RC); + MO.setReg(NewReg); + LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0))); + if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg)) + addSSAUpdateEntry(Reg, NewReg, PredBB); + } else { + auto VI = LocalVRMap.find(Reg); + if (VI != LocalVRMap.end()) { + // Need to make sure that the register class of the mapped register + // will satisfy the constraints of the class of the register being + // replaced. + auto *OrigRC = MRI->getRegClass(Reg); + auto *MappedRC = MRI->getRegClass(VI->second.Reg); + const TargetRegisterClass *ConstrRC; + if (VI->second.SubReg != 0) { + ConstrRC = TRI->getMatchingSuperRegClass(MappedRC, OrigRC, + VI->second.SubReg); + if (ConstrRC) { + // The actual constraining (as in "find appropriate new class") + // is done by getMatchingSuperRegClass, so now we only need to + // change the class of the mapped register. + MRI->setRegClass(VI->second.Reg, ConstrRC); + } + } else { + // For mapped registers that do not have sub-registers, simply + // restrict their class to match the original one. + ConstrRC = MRI->constrainRegClass(VI->second.Reg, OrigRC); + } + + if (ConstrRC) { + // If the class constraining succeeded, we can simply replace + // the old register with the mapped one. + MO.setReg(VI->second.Reg); + // We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a + // sub-register, we need to compose the sub-register indices. + MO.setSubReg(TRI->composeSubRegIndices(MO.getSubReg(), + VI->second.SubReg)); + } else { + // The direct replacement is not possible, due to failing register + // class constraints. An explicit COPY is necessary. Create one + // that can be reused + auto *NewRC = MI->getRegClassConstraint(i, TII, TRI); + if (NewRC == nullptr) + NewRC = OrigRC; + unsigned NewReg = MRI->createVirtualRegister(NewRC); + BuildMI(*PredBB, MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewReg) + .addReg(VI->second.Reg, 0, VI->second.SubReg); + LocalVRMap.erase(VI); + LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0))); + MO.setReg(NewReg); + // The composed VI.Reg:VI.SubReg is replaced with NewReg, which + // is equivalent to the whole register Reg. Hence, Reg:subreg + // is same as NewReg:subreg, so keep the sub-register index + // unchanged. + } + // Clear any kill flags from this operand. The new register could + // have uses after this one, so kills are not valid here. + MO.setIsKill(false); + } + } + } + } + PredBB->insert(PredBB->instr_end(), NewMI); +} + +/// After FromBB is tail duplicated into its predecessor blocks, the successors +/// have gained new predecessors. Update the PHI instructions in them +/// accordingly. +void TailDuplicator::updateSuccessorsPHIs( + MachineBasicBlock *FromBB, bool isDead, + SmallVectorImpl<MachineBasicBlock *> &TDBBs, + SmallSetVector<MachineBasicBlock *, 8> &Succs) { + for (SmallSetVector<MachineBasicBlock *, 8>::iterator SI = Succs.begin(), + SE = Succs.end(); + SI != SE; ++SI) { + MachineBasicBlock *SuccBB = *SI; + for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end(); + II != EE; ++II) { + if (!II->isPHI()) + break; + MachineInstrBuilder MIB(*FromBB->getParent(), II); + unsigned Idx = 0; + for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { + MachineOperand &MO = II->getOperand(i + 1); + if (MO.getMBB() == FromBB) { + Idx = i; + break; + } + } + + assert(Idx != 0); + MachineOperand &MO0 = II->getOperand(Idx); + unsigned Reg = MO0.getReg(); + if (isDead) { + // Folded into the previous BB. + // There could be duplicate phi source entries. FIXME: Should sdisel + // or earlier pass fixed this? + for (unsigned i = II->getNumOperands() - 2; i != Idx; i -= 2) { + MachineOperand &MO = II->getOperand(i + 1); + if (MO.getMBB() == FromBB) { + II->RemoveOperand(i + 1); + II->RemoveOperand(i); + } + } + } else + Idx = 0; + + // If Idx is set, the operands at Idx and Idx+1 must be removed. + // We reuse the location to avoid expensive RemoveOperand calls. + + DenseMap<unsigned, AvailableValsTy>::iterator LI = + SSAUpdateVals.find(Reg); + if (LI != SSAUpdateVals.end()) { + // This register is defined in the tail block. + for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { + MachineBasicBlock *SrcBB = LI->second[j].first; + // If we didn't duplicate a bb into a particular predecessor, we + // might still have added an entry to SSAUpdateVals to correcly + // recompute SSA. If that case, avoid adding a dummy extra argument + // this PHI. + if (!SrcBB->isSuccessor(SuccBB)) + continue; + + unsigned SrcReg = LI->second[j].second; + if (Idx != 0) { + II->getOperand(Idx).setReg(SrcReg); + II->getOperand(Idx + 1).setMBB(SrcBB); + Idx = 0; + } else { + MIB.addReg(SrcReg).addMBB(SrcBB); + } + } + } else { + // Live in tail block, must also be live in predecessors. + for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { + MachineBasicBlock *SrcBB = TDBBs[j]; + if (Idx != 0) { + II->getOperand(Idx).setReg(Reg); + II->getOperand(Idx + 1).setMBB(SrcBB); + Idx = 0; + } else { + MIB.addReg(Reg).addMBB(SrcBB); + } + } + } + if (Idx != 0) { + II->RemoveOperand(Idx + 1); + II->RemoveOperand(Idx); + } + } + } +} + +/// Determine if it is profitable to duplicate this block. +bool TailDuplicator::shouldTailDuplicate(const MachineFunction &MF, + bool IsSimple, + MachineBasicBlock &TailBB) { + // Only duplicate blocks that end with unconditional branches. + if (TailBB.canFallThrough()) + return false; + + // Don't try to tail-duplicate single-block loops. + if (TailBB.isSuccessor(&TailBB)) + return false; + + // Set the limit on the cost to duplicate. When optimizing for size, + // duplicate only one, because one branch instruction can be eliminated to + // compensate for the duplication. + unsigned MaxDuplicateCount; + if (TailDuplicateSize.getNumOccurrences() == 0 && + // FIXME: Use Function::optForSize(). + MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) + MaxDuplicateCount = 1; + else + MaxDuplicateCount = TailDuplicateSize; + + // If the target has hardware branch prediction that can handle indirect + // branches, duplicating them can often make them predictable when there + // are common paths through the code. The limit needs to be high enough + // to allow undoing the effects of tail merging and other optimizations + // that rearrange the predecessors of the indirect branch. + + bool HasIndirectbr = false; + if (!TailBB.empty()) + HasIndirectbr = TailBB.back().isIndirectBranch(); + + if (HasIndirectbr && PreRegAlloc) + MaxDuplicateCount = 20; + + // Check the instructions in the block to determine whether tail-duplication + // is invalid or unlikely to be profitable. + unsigned InstrCount = 0; + for (MachineInstr &MI : TailBB) { + // Non-duplicable things shouldn't be tail-duplicated. + if (MI.isNotDuplicable()) + return false; + + // Convergent instructions can be duplicated only if doing so doesn't add + // new control dependencies, which is what we're going to do here. + if (MI.isConvergent()) + return false; + + // Do not duplicate 'return' instructions if this is a pre-regalloc run. + // A return may expand into a lot more instructions (e.g. reload of callee + // saved registers) after PEI. + if (PreRegAlloc && MI.isReturn()) + return false; + + // Avoid duplicating calls before register allocation. Calls presents a + // barrier to register allocation so duplicating them may end up increasing + // spills. + if (PreRegAlloc && MI.isCall()) + return false; + + if (!MI.isPHI() && !MI.isDebugValue()) + InstrCount += 1; + + if (InstrCount > MaxDuplicateCount) + return false; + } + + // Check if any of the successors of TailBB has a PHI node in which the + // value corresponding to TailBB uses a subregister. + // If a phi node uses a register paired with a subregister, the actual + // "value type" of the phi may differ from the type of the register without + // any subregisters. Due to a bug, tail duplication may add a new operand + // without a necessary subregister, producing an invalid code. This is + // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll. + // Disable tail duplication for this case for now, until the problem is + // fixed. + for (auto SB : TailBB.successors()) { + for (auto &I : *SB) { + if (!I.isPHI()) + break; + unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB); + assert(Idx != 0); + MachineOperand &PU = I.getOperand(Idx); + if (PU.getSubReg() != 0) + return false; + } + } + + if (HasIndirectbr && PreRegAlloc) + return true; + + if (IsSimple) + return true; + + if (!PreRegAlloc) + return true; + + return canCompletelyDuplicateBB(TailBB); +} + +/// True if this BB has only one unconditional jump. +bool TailDuplicator::isSimpleBB(MachineBasicBlock *TailBB) { + if (TailBB->succ_size() != 1) + return false; + if (TailBB->pred_empty()) + return false; + MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr(); + if (I == TailBB->end()) + return true; + return I->isUnconditionalBranch(); +} + +static bool bothUsedInPHI(const MachineBasicBlock &A, + const SmallPtrSet<MachineBasicBlock *, 8> &SuccsB) { + for (MachineBasicBlock *BB : A.successors()) + if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI()) + return true; + + return false; +} + +bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) { + for (MachineBasicBlock *PredBB : BB.predecessors()) { + if (PredBB->succ_size() > 1) + return false; + + MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; + SmallVector<MachineOperand, 4> PredCond; + if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + return false; + + if (!PredCond.empty()) + return false; + } + return true; +} + +bool TailDuplicator::duplicateSimpleBB( + MachineBasicBlock *TailBB, SmallVectorImpl<MachineBasicBlock *> &TDBBs, + const DenseSet<unsigned> &UsedByPhi, + SmallVectorImpl<MachineInstr *> &Copies) { + SmallPtrSet<MachineBasicBlock *, 8> Succs(TailBB->succ_begin(), + TailBB->succ_end()); + SmallVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), + TailBB->pred_end()); + bool Changed = false; + for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), + PE = Preds.end(); + PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + + if (PredBB->hasEHPadSuccessor()) + continue; + + if (bothUsedInPHI(*PredBB, Succs)) + continue; + + MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; + SmallVector<MachineOperand, 4> PredCond; + if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + continue; + + Changed = true; + DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB + << "From simple Succ: " << *TailBB); + + MachineBasicBlock *NewTarget = *TailBB->succ_begin(); + MachineBasicBlock *NextBB = &*std::next(PredBB->getIterator()); + + // Make PredFBB explicit. + if (PredCond.empty()) + PredFBB = PredTBB; + + // Make fall through explicit. + if (!PredTBB) + PredTBB = NextBB; + if (!PredFBB) + PredFBB = NextBB; + + // Redirect + if (PredFBB == TailBB) + PredFBB = NewTarget; + if (PredTBB == TailBB) + PredTBB = NewTarget; + + // Make the branch unconditional if possible + if (PredTBB == PredFBB) { + PredCond.clear(); + PredFBB = nullptr; + } + + // Avoid adding fall through branches. + if (PredFBB == NextBB) + PredFBB = nullptr; + if (PredTBB == NextBB && PredFBB == nullptr) + PredTBB = nullptr; + + TII->RemoveBranch(*PredBB); + + if (!PredBB->isSuccessor(NewTarget)) + PredBB->replaceSuccessor(TailBB, NewTarget); + else { + PredBB->removeSuccessor(TailBB, true); + assert(PredBB->succ_size() <= 1); + } + + if (PredTBB) + TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc()); + + TDBBs.push_back(PredBB); + } + return Changed; +} + +/// If it is profitable, duplicate TailBB's contents in each +/// of its predecessors. +bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple, + MachineBasicBlock *TailBB, + SmallVectorImpl<MachineBasicBlock *> &TDBBs, + SmallVectorImpl<MachineInstr *> &Copies) { + DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); + + DenseSet<unsigned> UsedByPhi; + getRegsUsedByPHIs(*TailBB, &UsedByPhi); + + if (IsSimple) + return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); + + // Iterate through all the unique predecessors and tail-duplicate this + // block into them, if possible. Copying the list ahead of time also + // avoids trouble with the predecessor list reallocating. + bool Changed = false; + SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), + TailBB->pred_end()); + for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), + PE = Preds.end(); + PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + + assert(TailBB != PredBB && + "Single-block loop should have been rejected earlier!"); + // EH edges are ignored by AnalyzeBranch. + if (PredBB->succ_size() > 1) + continue; + + MachineBasicBlock *PredTBB, *PredFBB; + SmallVector<MachineOperand, 4> PredCond; + if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + continue; + if (!PredCond.empty()) + continue; + // Don't duplicate into a fall-through predecessor (at least for now). + if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) + continue; + + DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB + << "From Succ: " << *TailBB); + + TDBBs.push_back(PredBB); + + // Remove PredBB's unconditional branch. + TII->RemoveBranch(*PredBB); + + // Clone the contents of TailBB into PredBB. + DenseMap<unsigned, RegSubRegPair> LocalVRMap; + SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; + // Use instr_iterator here to properly handle bundles, e.g. + // ARM Thumb2 IT block. + MachineBasicBlock::instr_iterator I = TailBB->instr_begin(); + while (I != TailBB->instr_end()) { + MachineInstr *MI = &*I; + ++I; + if (MI->isPHI()) { + // Replace the uses of the def of the PHI with the register coming + // from PredBB. + processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); + } else { + // Replace def of virtual registers with new registers, and update + // uses with PHI source register or the new registers. + duplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi); + } + } + appendCopies(PredBB, CopyInfos, Copies); + + // Simplify + TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); + + NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch + + // Update the CFG. + PredBB->removeSuccessor(PredBB->succ_begin()); + assert(PredBB->succ_empty() && + "TailDuplicate called on block with multiple successors!"); + for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), + E = TailBB->succ_end(); + I != E; ++I) + PredBB->addSuccessor(*I, MBPI->getEdgeProbability(TailBB, I)); + + Changed = true; + ++NumTailDups; + } + + // If TailBB was duplicated into all its predecessors except for the prior + // block, which falls through unconditionally, move the contents of this + // block into the prior block. + MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator()); + MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; + SmallVector<MachineOperand, 4> PriorCond; + // This has to check PrevBB->succ_size() because EH edges are ignored by + // AnalyzeBranch. + if (PrevBB->succ_size() == 1 && + !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && + PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && + !TailBB->hasAddressTaken()) { + DEBUG(dbgs() << "\nMerging into block: " << *PrevBB + << "From MBB: " << *TailBB); + if (PreRegAlloc) { + DenseMap<unsigned, RegSubRegPair> LocalVRMap; + SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; + MachineBasicBlock::iterator I = TailBB->begin(); + // Process PHI instructions first. + while (I != TailBB->end() && I->isPHI()) { + // Replace the uses of the def of the PHI with the register coming + // from PredBB. + MachineInstr *MI = &*I++; + processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); + } + + // Now copy the non-PHI instructions. + while (I != TailBB->end()) { + // Replace def of virtual registers with new registers, and update + // uses with PHI source register or the new registers. + MachineInstr *MI = &*I++; + assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); + duplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi); + MI->eraseFromParent(); + } + appendCopies(PrevBB, CopyInfos, Copies); + } else { + // No PHIs to worry about, just splice the instructions over. + PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); + } + PrevBB->removeSuccessor(PrevBB->succ_begin()); + assert(PrevBB->succ_empty()); + PrevBB->transferSuccessors(TailBB); + TDBBs.push_back(PrevBB); + Changed = true; + } + + // If this is after register allocation, there are no phis to fix. + if (!PreRegAlloc) + return Changed; + + // If we made no changes so far, we are safe. + if (!Changed) + return Changed; + + // Handle the nasty case in that we duplicated a block that is part of a loop + // into some but not all of its predecessors. For example: + // 1 -> 2 <-> 3 | + // \ | + // \---> rest | + // if we duplicate 2 into 1 but not into 3, we end up with + // 12 -> 3 <-> 2 -> rest | + // \ / | + // \----->-----/ | + // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced + // with a phi in 3 (which now dominates 2). + // What we do here is introduce a copy in 3 of the register defined by the + // phi, just like when we are duplicating 2 into 3, but we don't copy any + // real instructions or remove the 3 -> 2 edge from the phi in 2. + for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), + PE = Preds.end(); + PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end()) + continue; + + // EH edges + if (PredBB->succ_size() != 1) + continue; + + DenseMap<unsigned, RegSubRegPair> LocalVRMap; + SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; + MachineBasicBlock::iterator I = TailBB->begin(); + // Process PHI instructions first. + while (I != TailBB->end() && I->isPHI()) { + // Replace the uses of the def of the PHI with the register coming + // from PredBB. + MachineInstr *MI = &*I++; + processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); + } + appendCopies(PredBB, CopyInfos, Copies); + } + + return Changed; +} + +/// At the end of the block \p MBB generate COPY instructions between registers +/// described by \p CopyInfos. Append resulting instructions to \p Copies. +void TailDuplicator::appendCopies(MachineBasicBlock *MBB, + SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos, + SmallVectorImpl<MachineInstr*> &Copies) { + MachineBasicBlock::iterator Loc = MBB->getFirstTerminator(); + const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY); + for (auto &CI : CopyInfos) { + auto C = BuildMI(*MBB, Loc, DebugLoc(), CopyD, CI.first) + .addReg(CI.second.Reg, 0, CI.second.SubReg); + Copies.push_back(C); + } +} + +/// Remove the specified dead machine basic block from the function, updating +/// the CFG. +void TailDuplicator::removeDeadBlock(MachineBasicBlock *MBB) { + assert(MBB->pred_empty() && "MBB must be dead!"); + DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); + + // Remove all successors. + while (!MBB->succ_empty()) + MBB->removeSuccessor(MBB->succ_end() - 1); + + // Remove the block. + MBB->eraseFromParent(); +} + +} // End llvm namespace diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp index 679ade1..cac7e63 100644 --- a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -12,13 +12,14 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/BitVector.h" -#include "llvm/Target/TargetFrameLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <cstdlib> @@ -59,16 +60,25 @@ bool TargetFrameLowering::needsFrameIndexResolution( void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { - // Get the callee saved register list... const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + + // Resize before the early returns. Some backends expect that + // SavedRegs.size() == TRI.getNumRegs() after this call even if there are no + // saved registers. + SavedRegs.resize(TRI.getNumRegs()); + + // When interprocedural register allocation is enabled caller saved registers + // are preferred over callee saved registers. + if (MF.getTarget().Options.EnableIPRA && isSafeForNoCSROpt(MF.getFunction())) + return; + + // Get the callee saved register list... const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); // Early exit if there are no callee saved registers. if (!CSRegs || CSRegs[0] == 0) return; - SavedRegs.resize(TRI.getNumRegs()); - // In Naked functions we aren't going to save any registers. if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) return; diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp index 6eaf991..e7330c6 100644 --- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -31,6 +31,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include <cctype> + using namespace llvm; static cl::opt<bool> DisableHazardRecognizer( @@ -76,25 +77,27 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, /// may be overloaded in the target code to do that. unsigned TargetInstrInfo::getInlineAsmLength(const char *Str, const MCAsmInfo &MAI) const { - - // Count the number of instructions in the asm. bool atInsnStart = true; - unsigned Length = 0; + unsigned InstCount = 0; for (; *Str; ++Str) { if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(), - strlen(MAI.getSeparatorString())) == 0) + strlen(MAI.getSeparatorString())) == 0) { atInsnStart = true; - if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) { - Length += MAI.getMaxInstLength(); + } else if (strncmp(Str, MAI.getCommentString(), + strlen(MAI.getCommentString())) == 0) { + // Stop counting as an instruction after a comment until the next + // separator. atInsnStart = false; } - if (atInsnStart && strncmp(Str, MAI.getCommentString(), - strlen(MAI.getCommentString())) == 0) + + if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) { + ++InstCount; atInsnStart = false; + } } - return Length; + return InstCount * MAI.getMaxInstLength(); } /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything @@ -108,23 +111,24 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, while (!MBB->succ_empty()) MBB->removeSuccessor(MBB->succ_begin()); + // Save off the debug loc before erasing the instruction. + DebugLoc DL = Tail->getDebugLoc(); + // Remove all the dead instructions from the end of MBB. MBB->erase(Tail, MBB->end()); // If MBB isn't immediately before MBB, insert a branch to it. if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest)) - InsertBranch(*MBB, NewDest, nullptr, SmallVector<MachineOperand, 0>(), - Tail->getDebugLoc()); + InsertBranch(*MBB, NewDest, nullptr, SmallVector<MachineOperand, 0>(), DL); MBB->addSuccessor(NewDest); } -MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr *MI, - bool NewMI, - unsigned Idx1, +MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI, + bool NewMI, unsigned Idx1, unsigned Idx2) const { - const MCInstrDesc &MCID = MI->getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); bool HasDef = MCID.getNumDefs(); - if (HasDef && !MI->getOperand(0).isReg()) + if (HasDef && !MI.getOperand(0).isReg()) // No idea how to commute this instruction. Target should implement its own. return nullptr; @@ -133,60 +137,62 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr *MI, assert(findCommutedOpIndices(MI, CommutableOpIdx1, CommutableOpIdx2) && CommutableOpIdx1 == Idx1 && CommutableOpIdx2 == Idx2 && "TargetInstrInfo::CommuteInstructionImpl(): not commutable operands."); - assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() && + assert(MI.getOperand(Idx1).isReg() && MI.getOperand(Idx2).isReg() && "This only knows how to commute register operands so far"); - unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0; - unsigned Reg1 = MI->getOperand(Idx1).getReg(); - unsigned Reg2 = MI->getOperand(Idx2).getReg(); - unsigned SubReg0 = HasDef ? MI->getOperand(0).getSubReg() : 0; - unsigned SubReg1 = MI->getOperand(Idx1).getSubReg(); - unsigned SubReg2 = MI->getOperand(Idx2).getSubReg(); - bool Reg1IsKill = MI->getOperand(Idx1).isKill(); - bool Reg2IsKill = MI->getOperand(Idx2).isKill(); - bool Reg1IsUndef = MI->getOperand(Idx1).isUndef(); - bool Reg2IsUndef = MI->getOperand(Idx2).isUndef(); - bool Reg1IsInternal = MI->getOperand(Idx1).isInternalRead(); - bool Reg2IsInternal = MI->getOperand(Idx2).isInternalRead(); + unsigned Reg0 = HasDef ? MI.getOperand(0).getReg() : 0; + unsigned Reg1 = MI.getOperand(Idx1).getReg(); + unsigned Reg2 = MI.getOperand(Idx2).getReg(); + unsigned SubReg0 = HasDef ? MI.getOperand(0).getSubReg() : 0; + unsigned SubReg1 = MI.getOperand(Idx1).getSubReg(); + unsigned SubReg2 = MI.getOperand(Idx2).getSubReg(); + bool Reg1IsKill = MI.getOperand(Idx1).isKill(); + bool Reg2IsKill = MI.getOperand(Idx2).isKill(); + bool Reg1IsUndef = MI.getOperand(Idx1).isUndef(); + bool Reg2IsUndef = MI.getOperand(Idx2).isUndef(); + bool Reg1IsInternal = MI.getOperand(Idx1).isInternalRead(); + bool Reg2IsInternal = MI.getOperand(Idx2).isInternalRead(); // If destination is tied to either of the commuted source register, then // it must be updated. if (HasDef && Reg0 == Reg1 && - MI->getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) { + MI.getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) { Reg2IsKill = false; Reg0 = Reg2; SubReg0 = SubReg2; } else if (HasDef && Reg0 == Reg2 && - MI->getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) { + MI.getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) { Reg1IsKill = false; Reg0 = Reg1; SubReg0 = SubReg1; } + MachineInstr *CommutedMI = nullptr; if (NewMI) { // Create a new instruction. - MachineFunction &MF = *MI->getParent()->getParent(); - MI = MF.CloneMachineInstr(MI); + MachineFunction &MF = *MI.getParent()->getParent(); + CommutedMI = MF.CloneMachineInstr(&MI); + } else { + CommutedMI = &MI; } if (HasDef) { - MI->getOperand(0).setReg(Reg0); - MI->getOperand(0).setSubReg(SubReg0); + CommutedMI->getOperand(0).setReg(Reg0); + CommutedMI->getOperand(0).setSubReg(SubReg0); } - MI->getOperand(Idx2).setReg(Reg1); - MI->getOperand(Idx1).setReg(Reg2); - MI->getOperand(Idx2).setSubReg(SubReg1); - MI->getOperand(Idx1).setSubReg(SubReg2); - MI->getOperand(Idx2).setIsKill(Reg1IsKill); - MI->getOperand(Idx1).setIsKill(Reg2IsKill); - MI->getOperand(Idx2).setIsUndef(Reg1IsUndef); - MI->getOperand(Idx1).setIsUndef(Reg2IsUndef); - MI->getOperand(Idx2).setIsInternalRead(Reg1IsInternal); - MI->getOperand(Idx1).setIsInternalRead(Reg2IsInternal); - return MI; + CommutedMI->getOperand(Idx2).setReg(Reg1); + CommutedMI->getOperand(Idx1).setReg(Reg2); + CommutedMI->getOperand(Idx2).setSubReg(SubReg1); + CommutedMI->getOperand(Idx1).setSubReg(SubReg2); + CommutedMI->getOperand(Idx2).setIsKill(Reg1IsKill); + CommutedMI->getOperand(Idx1).setIsKill(Reg2IsKill); + CommutedMI->getOperand(Idx2).setIsUndef(Reg1IsUndef); + CommutedMI->getOperand(Idx1).setIsUndef(Reg2IsUndef); + CommutedMI->getOperand(Idx2).setIsInternalRead(Reg1IsInternal); + CommutedMI->getOperand(Idx1).setIsInternalRead(Reg2IsInternal); + return CommutedMI; } -MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI, - bool NewMI, +MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const { // If OpIdx1 or OpIdx2 is not specified, then this method is free to choose @@ -194,7 +200,7 @@ MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI, // called below. if ((OpIdx1 == CommuteAnyOperandIndex || OpIdx2 == CommuteAnyOperandIndex) && !findCommutedOpIndices(MI, OpIdx1, OpIdx2)) { - assert(MI->isCommutable() && + assert(MI.isCommutable() && "Precondition violation: MI must be commutable."); return nullptr; } @@ -232,13 +238,13 @@ bool TargetInstrInfo::fixCommutedOpIndices(unsigned &ResultIdx1, return true; } -bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI, +bool TargetInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const { - assert(!MI->isBundle() && + assert(!MI.isBundle() && "TargetInstrInfo::findCommutedOpIndices() can't handle bundles"); - const MCInstrDesc &MCID = MI->getDesc(); + const MCInstrDesc &MCID = MI.getDesc(); if (!MCID.isCommutable()) return false; @@ -250,39 +256,37 @@ bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI, CommutableOpIdx1, CommutableOpIdx2)) return false; - if (!MI->getOperand(SrcOpIdx1).isReg() || - !MI->getOperand(SrcOpIdx2).isReg()) + if (!MI.getOperand(SrcOpIdx1).isReg() || !MI.getOperand(SrcOpIdx2).isReg()) // No idea. return false; return true; } -bool -TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - if (!MI->isTerminator()) return false; +bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const { + if (!MI.isTerminator()) return false; // Conditional branch is a special case. - if (MI->isBranch() && !MI->isBarrier()) + if (MI.isBranch() && !MI.isBarrier()) return true; - if (!MI->isPredicable()) + if (!MI.isPredicable()) return true; return !isPredicated(MI); } bool TargetInstrInfo::PredicateInstruction( - MachineInstr *MI, ArrayRef<MachineOperand> Pred) const { + MachineInstr &MI, ArrayRef<MachineOperand> Pred) const { bool MadeChange = false; - assert(!MI->isBundle() && + assert(!MI.isBundle() && "TargetInstrInfo::PredicateInstruction() can't handle bundles"); - const MCInstrDesc &MCID = MI->getDesc(); - if (!MI->isPredicable()) + const MCInstrDesc &MCID = MI.getDesc(); + if (!MI.isPredicable()) return false; - for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) { + for (unsigned j = 0, i = 0, e = MI.getNumOperands(); i != e; ++i) { if (MCID.OpInfo[i].isPredicate()) { - MachineOperand &MO = MI->getOperand(i); + MachineOperand &MO = MI.getOperand(i); if (MO.isReg()) { MO.setReg(Pred[j].getReg()); MadeChange = true; @@ -299,13 +303,12 @@ bool TargetInstrInfo::PredicateInstruction( return MadeChange; } -bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, +bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr &MI, const MachineMemOperand *&MMO, int &FrameIndex) const { - for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), - oe = MI->memoperands_end(); - o != oe; - ++o) { + for (MachineInstr::mmo_iterator o = MI.memoperands_begin(), + oe = MI.memoperands_end(); + o != oe; ++o) { if ((*o)->isLoad()) { if (const FixedStackPseudoSourceValue *Value = dyn_cast_or_null<FixedStackPseudoSourceValue>( @@ -319,13 +322,12 @@ bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, return false; } -bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI, +bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr &MI, const MachineMemOperand *&MMO, int &FrameIndex) const { - for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), - oe = MI->memoperands_end(); - o != oe; - ++o) { + for (MachineInstr::mmo_iterator o = MI.memoperands_begin(), + oe = MI.memoperands_end(); + o != oe; ++o) { if ((*o)->isStore()) { if (const FixedStackPseudoSourceValue *Value = dyn_cast_or_null<FixedStackPseudoSourceValue>( @@ -372,40 +374,37 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, - unsigned SubIdx, - const MachineInstr *Orig, + unsigned DestReg, unsigned SubIdx, + const MachineInstr &Orig, const TargetRegisterInfo &TRI) const { - MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); + MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig); MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI); MBB.insert(I, MI); } -bool -TargetInstrInfo::produceSameValue(const MachineInstr *MI0, - const MachineInstr *MI1, - const MachineRegisterInfo *MRI) const { - return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); +bool TargetInstrInfo::produceSameValue(const MachineInstr &MI0, + const MachineInstr &MI1, + const MachineRegisterInfo *MRI) const { + return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); } -MachineInstr *TargetInstrInfo::duplicate(MachineInstr *Orig, +MachineInstr *TargetInstrInfo::duplicate(MachineInstr &Orig, MachineFunction &MF) const { - assert(!Orig->isNotDuplicable() && - "Instruction cannot be duplicated"); - return MF.CloneMachineInstr(Orig); + assert(!Orig.isNotDuplicable() && "Instruction cannot be duplicated"); + return MF.CloneMachineInstr(&Orig); } // If the COPY instruction in MI can be folded to a stack operation, return // the register class to use. -static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI, +static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI, unsigned FoldIdx) { - assert(MI->isCopy() && "MI must be a COPY instruction"); - if (MI->getNumOperands() != 2) + assert(MI.isCopy() && "MI must be a COPY instruction"); + if (MI.getNumOperands() != 2) return nullptr; assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand"); - const MachineOperand &FoldOp = MI->getOperand(FoldIdx); - const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx); + const MachineOperand &FoldOp = MI.getOperand(FoldIdx); + const MachineOperand &LiveOp = MI.getOperand(1 - FoldIdx); if (FoldOp.getSubReg() || LiveOp.getSubReg()) return nullptr; @@ -416,7 +415,7 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI, assert(TargetRegisterInfo::isVirtualRegister(FoldReg) && "Cannot fold physregs"); - const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); const TargetRegisterClass *RC = MRI.getRegClass(FoldReg); if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg())) @@ -433,17 +432,17 @@ void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { llvm_unreachable("Not a MachO target"); } -static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr *MI, +static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, int FrameIndex, const TargetInstrInfo &TII) { unsigned StartIdx = 0; - switch (MI->getOpcode()) { + switch (MI.getOpcode()) { case TargetOpcode::STACKMAP: StartIdx = 2; // Skip ID, nShadowBytes. break; case TargetOpcode::PATCHPOINT: { // For PatchPoint, the call args are not foldable. - PatchPointOpers opers(MI); + PatchPointOpers opers(&MI); StartIdx = opers.getVarIdx(); break; } @@ -459,15 +458,15 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr *MI, } MachineInstr *NewMI = - MF.CreateMachineInstr(TII.get(MI->getOpcode()), MI->getDebugLoc(), true); + MF.CreateMachineInstr(TII.get(MI.getOpcode()), MI.getDebugLoc(), true); MachineInstrBuilder MIB(MF, NewMI); // No need to fold return, the meta data, and function arguments for (unsigned i = 0; i < StartIdx; ++i) - MIB.addOperand(MI->getOperand(i)); + MIB.addOperand(MI.getOperand(i)); - for (unsigned i = StartIdx; i < MI->getNumOperands(); ++i) { - MachineOperand &MO = MI->getOperand(i); + for (unsigned i = StartIdx; i < MI.getNumOperands(); ++i) { + MachineOperand &MO = MI.getOperand(i); if (std::find(Ops.begin(), Ops.end(), i) != Ops.end()) { unsigned SpillSize; unsigned SpillOffset; @@ -495,35 +494,35 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr *MI, /// operand folded, otherwise NULL is returned. The client is responsible for /// removing the old instruction and adding the new one in the instruction /// stream. -MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, - ArrayRef<unsigned> Ops, - int FI) const { - unsigned Flags = 0; +MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, + ArrayRef<unsigned> Ops, int FI, + LiveIntervals *LIS) const { + auto Flags = MachineMemOperand::MONone; for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (MI->getOperand(Ops[i]).isDef()) + if (MI.getOperand(Ops[i]).isDef()) Flags |= MachineMemOperand::MOStore; else Flags |= MachineMemOperand::MOLoad; - MachineBasicBlock *MBB = MI->getParent(); + MachineBasicBlock *MBB = MI.getParent(); assert(MBB && "foldMemoryOperand needs an inserted instruction"); MachineFunction &MF = *MBB->getParent(); MachineInstr *NewMI = nullptr; - if (MI->getOpcode() == TargetOpcode::STACKMAP || - MI->getOpcode() == TargetOpcode::PATCHPOINT) { + if (MI.getOpcode() == TargetOpcode::STACKMAP || + MI.getOpcode() == TargetOpcode::PATCHPOINT) { // Fold stackmap/patchpoint. NewMI = foldPatchpoint(MF, MI, Ops, FI, *this); if (NewMI) MBB->insert(MI, NewMI); } else { // Ask the target to do the actual folding. - NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI); + NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS); } if (NewMI) { - NewMI->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); // Add a memory operand, foldMemoryOperandImpl doesn't do that. assert((!(Flags & MachineMemOperand::MOStore) || NewMI->mayStore()) && @@ -542,14 +541,14 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, } // Straight COPY may fold as load/store. - if (!MI->isCopy() || Ops.size() != 1) + if (!MI.isCopy() || Ops.size() != 1) return nullptr; const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]); if (!RC) return nullptr; - const MachineOperand &MO = MI->getOperand(1-Ops[0]); + const MachineOperand &MO = MI.getOperand(1 - Ops[0]); MachineBasicBlock::iterator Pos = MI; const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); @@ -557,7 +556,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI); else loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI); - return --Pos; + return &*--Pos; } bool TargetInstrInfo::hasReassociableOperands( @@ -637,7 +636,6 @@ bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst, bool TargetInstrInfo::getMachineCombinerPatterns( MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns) const { - bool Commute; if (isReassociationCandidate(Root, Commute)) { // We found a sequence of instructions that may be suitable for a @@ -656,7 +654,11 @@ bool TargetInstrInfo::getMachineCombinerPatterns( return false; } - +/// Return true when a code sequence can improve loop throughput. +bool +TargetInstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const { + return false; +} /// Attempt the reassociation transformation to reduce critical path length. /// See the above comments before getMachineCombinerPatterns(). void TargetInstrInfo::reassociateOps( @@ -768,75 +770,73 @@ void TargetInstrInfo::genAlternativeCodeSequence( assert(Prev && "Unknown pattern for machine combiner"); reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); - return; } /// foldMemoryOperand - Same as the previous version except it allows folding /// of any load and store from / to any address, not just from a specific /// stack slot. -MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, +MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, ArrayRef<unsigned> Ops, - MachineInstr *LoadMI) const { - assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!"); + MachineInstr &LoadMI, + LiveIntervals *LIS) const { + assert(LoadMI.canFoldAsLoad() && "LoadMI isn't foldable!"); #ifndef NDEBUG for (unsigned i = 0, e = Ops.size(); i != e; ++i) - assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!"); + assert(MI.getOperand(Ops[i]).isUse() && "Folding load into def!"); #endif - MachineBasicBlock &MBB = *MI->getParent(); + MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); // Ask the target to do the actual folding. MachineInstr *NewMI = nullptr; int FrameIndex = 0; - if ((MI->getOpcode() == TargetOpcode::STACKMAP || - MI->getOpcode() == TargetOpcode::PATCHPOINT) && + if ((MI.getOpcode() == TargetOpcode::STACKMAP || + MI.getOpcode() == TargetOpcode::PATCHPOINT) && isLoadFromStackSlot(LoadMI, FrameIndex)) { // Fold stackmap/patchpoint. NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this); if (NewMI) - NewMI = MBB.insert(MI, NewMI); + NewMI = &*MBB.insert(MI, NewMI); } else { // Ask the target to do the actual folding. - NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI); + NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, LIS); } if (!NewMI) return nullptr; // Copy the memoperands from the load to the folded instruction. - if (MI->memoperands_empty()) { - NewMI->setMemRefs(LoadMI->memoperands_begin(), - LoadMI->memoperands_end()); + if (MI.memoperands_empty()) { + NewMI->setMemRefs(LoadMI.memoperands_begin(), LoadMI.memoperands_end()); } else { // Handle the rare case of folding multiple loads. - NewMI->setMemRefs(MI->memoperands_begin(), - MI->memoperands_end()); - for (MachineInstr::mmo_iterator I = LoadMI->memoperands_begin(), - E = LoadMI->memoperands_end(); I != E; ++I) { + NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + for (MachineInstr::mmo_iterator I = LoadMI.memoperands_begin(), + E = LoadMI.memoperands_end(); + I != E; ++I) { NewMI->addMemOperand(MF, *I); } } return NewMI; } -bool TargetInstrInfo:: -isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, - AliasAnalysis *AA) const { - const MachineFunction &MF = *MI->getParent()->getParent(); +bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( + const MachineInstr &MI, AliasAnalysis *AA) const { + const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); // Remat clients assume operand 0 is the defined register. - if (!MI->getNumOperands() || !MI->getOperand(0).isReg()) + if (!MI.getNumOperands() || !MI.getOperand(0).isReg()) return false; - unsigned DefReg = MI->getOperand(0).getReg(); + unsigned DefReg = MI.getOperand(0).getReg(); // A sub-register definition can only be rematerialized if the instruction // doesn't read the other parts of the register. Otherwise it is really a // read-modify-write operation on the full virtual register which cannot be // moved safely. if (TargetRegisterInfo::isVirtualRegister(DefReg) && - MI->getOperand(0).getSubReg() && MI->readsVirtualRegister(DefReg)) + MI.getOperand(0).getSubReg() && MI.readsVirtualRegister(DefReg)) return false; // A load from a fixed stack slot can be rematerialized. This may be @@ -848,23 +848,22 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, return true; // Avoid instructions obviously unsafe for remat. - if (MI->isNotDuplicable() || MI->mayStore() || - MI->hasUnmodeledSideEffects()) + if (MI.isNotDuplicable() || MI.mayStore() || MI.hasUnmodeledSideEffects()) return false; // Don't remat inline asm. We have no idea how expensive it is // even if it's side effect free. - if (MI->isInlineAsm()) + if (MI.isInlineAsm()) return false; // Avoid instructions which load from potentially varying memory. - if (MI->mayLoad() && !MI->isInvariantLoad(AA)) + if (MI.mayLoad() && !MI.isInvariantLoad(AA)) return false; // If any of the registers accessed are non-constant, conservatively assume // the instruction is not rematerializable. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) @@ -901,8 +900,8 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, return true; } -int TargetInstrInfo::getSPAdjust(const MachineInstr *MI) const { - const MachineFunction *MF = MI->getParent()->getParent(); +int TargetInstrInfo::getSPAdjust(const MachineInstr &MI) const { + const MachineFunction *MF = MI.getParent()->getParent(); const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); bool StackGrowsDown = TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; @@ -910,15 +909,15 @@ int TargetInstrInfo::getSPAdjust(const MachineInstr *MI) const { unsigned FrameSetupOpcode = getCallFrameSetupOpcode(); unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode(); - if (MI->getOpcode() != FrameSetupOpcode && - MI->getOpcode() != FrameDestroyOpcode) + if (MI.getOpcode() != FrameSetupOpcode && + MI.getOpcode() != FrameDestroyOpcode) return 0; - - int SPAdj = MI->getOperand(0).getImm(); + + int SPAdj = MI.getOperand(0).getImm(); SPAdj = TFI->alignSPAdjust(SPAdj); - if ((!StackGrowsDown && MI->getOpcode() == FrameSetupOpcode) || - (StackGrowsDown && MI->getOpcode() == FrameDestroyOpcode)) + if ((!StackGrowsDown && MI.getOpcode() == FrameSetupOpcode) || + (StackGrowsDown && MI.getOpcode() == FrameDestroyOpcode)) SPAdj = -SPAdj; return SPAdj; @@ -927,11 +926,11 @@ int TargetInstrInfo::getSPAdjust(const MachineInstr *MI) const { /// isSchedulingBoundary - Test if the given instruction should be /// considered a scheduling boundary. This primarily includes labels /// and terminators. -bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI, +bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const { // Terminators and labels can't be scheduled around. - if (MI->isTerminator() || MI->isPosition()) + if (MI.isTerminator() || MI.isPosition()) return true; // Don't attempt to schedule around any instruction that defines @@ -941,7 +940,7 @@ bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI, // modification. const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - return MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI); + return MI.modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI); } // Provide a global flag for disabling the PreRA hazard recognizer that targets @@ -1010,13 +1009,12 @@ int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, // MachineInstr latency interface. //===----------------------------------------------------------------------===// -unsigned -TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, - const MachineInstr *MI) const { +unsigned TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, + const MachineInstr &MI) const { if (!ItinData || ItinData->isEmpty()) return 1; - unsigned Class = MI->getDesc().getSchedClass(); + unsigned Class = MI.getDesc().getSchedClass(); int UOps = ItinData->Itineraries[Class].NumMicroOps; if (UOps >= 0) return UOps; @@ -1028,60 +1026,59 @@ TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, /// Return the default expected latency for a def based on it's opcode. unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel &SchedModel, - const MachineInstr *DefMI) const { - if (DefMI->isTransient()) + const MachineInstr &DefMI) const { + if (DefMI.isTransient()) return 0; - if (DefMI->mayLoad()) + if (DefMI.mayLoad()) return SchedModel.LoadLatency; - if (isHighLatencyDef(DefMI->getOpcode())) + if (isHighLatencyDef(DefMI.getOpcode())) return SchedModel.HighLatency; return 1; } -unsigned TargetInstrInfo::getPredicationCost(const MachineInstr *) const { +unsigned TargetInstrInfo::getPredicationCost(const MachineInstr &) const { return 0; } -unsigned TargetInstrInfo:: -getInstrLatency(const InstrItineraryData *ItinData, - const MachineInstr *MI, - unsigned *PredCost) const { +unsigned TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr &MI, + unsigned *PredCost) const { // Default to one cycle for no itinerary. However, an "empty" itinerary may // still have a MinLatency property, which getStageLatency checks. if (!ItinData) - return MI->mayLoad() ? 2 : 1; + return MI.mayLoad() ? 2 : 1; - return ItinData->getStageLatency(MI->getDesc().getSchedClass()); + return ItinData->getStageLatency(MI.getDesc().getSchedClass()); } bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, - const MachineInstr *DefMI, + const MachineInstr &DefMI, unsigned DefIdx) const { const InstrItineraryData *ItinData = SchedModel.getInstrItineraries(); if (!ItinData || ItinData->isEmpty()) return false; - unsigned DefClass = DefMI->getDesc().getSchedClass(); + unsigned DefClass = DefMI.getDesc().getSchedClass(); int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); return (DefCycle != -1 && DefCycle <= 1); } /// Both DefMI and UseMI must be valid. By default, call directly to the /// itinerary. This may be overriden by the target. -int TargetInstrInfo:: -getOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx) const { - unsigned DefClass = DefMI->getDesc().getSchedClass(); - unsigned UseClass = UseMI->getDesc().getSchedClass(); +int TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr &DefMI, + unsigned DefIdx, + const MachineInstr &UseMI, + unsigned UseIdx) const { + unsigned DefClass = DefMI.getDesc().getSchedClass(); + unsigned UseClass = UseMI.getDesc().getSchedClass(); return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); } /// If we can determine the operand latency from the def only, without itinerary /// lookup, do so. Otherwise return -1. int TargetInstrInfo::computeDefOperandLatency( - const InstrItineraryData *ItinData, - const MachineInstr *DefMI) const { + const InstrItineraryData *ItinData, const MachineInstr &DefMI) const { // Let the target hook getInstrLatency handle missing itineraries. if (!ItinData) @@ -1094,21 +1091,9 @@ int TargetInstrInfo::computeDefOperandLatency( return -1; } -/// computeOperandLatency - Compute and return the latency of the given data -/// dependent def and use when the operand indices are already known. UseMI may -/// be NULL for an unknown use. -/// -/// FindMin may be set to get the minimum vs. expected latency. Minimum -/// latency is used for scheduling groups, while expected latency is for -/// instruction cost and critical path. -/// -/// Depending on the subtarget's itinerary properties, this may or may not need -/// to call getOperandLatency(). For most subtargets, we don't need DefIdx or -/// UseIdx to compute min latency. -unsigned TargetInstrInfo:: -computeOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx) const { +unsigned TargetInstrInfo::computeOperandLatency( + const InstrItineraryData *ItinData, const MachineInstr &DefMI, + unsigned DefIdx, const MachineInstr *UseMI, unsigned UseIdx) const { int DefLatency = computeDefOperandLatency(ItinData, DefMI); if (DefLatency >= 0) @@ -1118,9 +1103,9 @@ computeOperandLatency(const InstrItineraryData *ItinData, int OperLatency = 0; if (UseMI) - OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); + OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, *UseMI, UseIdx); else { - unsigned DefClass = DefMI->getDesc().getSchedClass(); + unsigned DefClass = DefMI.getDesc().getSchedClass(); OperLatency = ItinData->getOperandCycle(DefClass, DefIdx); } if (OperLatency >= 0) diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp index 36a31c9..6d3fe8c 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -28,6 +28,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -43,6 +44,17 @@ static cl::opt<bool> JumpIsExpensiveOverride( cl::desc("Do not create extra branches to split comparison logic."), cl::Hidden); +// Although this default value is arbitrary, it is not random. It is assumed +// that a condition that evaluates the same way by a higher percentage than this +// is best represented as control flow. Therefore, the default value N should be +// set such that the win from N% correct executions is greater than the loss +// from (100 - N)% mispredicted executions for the majority of intended targets. +static cl::opt<int> MinPercentageForPredictableBranch( + "min-predictable-branch", cl::init(99), + cl::desc("Minimum percentage (0-100) that a condition must be either true " + "or false to assume that the condition is predictable"), + cl::Hidden); + /// InitLibcallNames - Set default libcall names. /// static void InitLibcallNames(const char **Names, const Triple &TT) { @@ -87,18 +99,6 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::UREM_I64] = "__umoddi3"; Names[RTLIB::UREM_I128] = "__umodti3"; - // These are generally not available. - Names[RTLIB::SDIVREM_I8] = nullptr; - Names[RTLIB::SDIVREM_I16] = nullptr; - Names[RTLIB::SDIVREM_I32] = nullptr; - Names[RTLIB::SDIVREM_I64] = nullptr; - Names[RTLIB::SDIVREM_I128] = nullptr; - Names[RTLIB::UDIVREM_I8] = nullptr; - Names[RTLIB::UDIVREM_I16] = nullptr; - Names[RTLIB::UDIVREM_I32] = nullptr; - Names[RTLIB::UDIVREM_I64] = nullptr; - Names[RTLIB::UDIVREM_I128] = nullptr; - Names[RTLIB::NEG_I32] = "__negsi2"; Names[RTLIB::NEG_I64] = "__negdi2"; Names[RTLIB::ADD_F32] = "__addsf3"; @@ -231,11 +231,21 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::COPYSIGN_F80] = "copysignl"; Names[RTLIB::COPYSIGN_F128] = "copysignl"; Names[RTLIB::COPYSIGN_PPCF128] = "copysignl"; + Names[RTLIB::FPEXT_F32_PPCF128] = "__gcc_stoq"; + Names[RTLIB::FPEXT_F64_PPCF128] = "__gcc_dtoq"; Names[RTLIB::FPEXT_F64_F128] = "__extenddftf2"; Names[RTLIB::FPEXT_F32_F128] = "__extendsftf2"; Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2"; - Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee"; - Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee"; + if (TT.isOSDarwin()) { + // For f16/f32 conversions, Darwin uses the standard naming scheme, instead + // of the gnueabi-style __gnu_*_ieee. + // FIXME: What about other targets? + Names[RTLIB::FPEXT_F16_F32] = "__extendhfsf2"; + Names[RTLIB::FPROUND_F32_F16] = "__truncsfhf2"; + } else { + Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee"; + Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee"; + } Names[RTLIB::FPROUND_F64_F16] = "__truncdfhf2"; Names[RTLIB::FPROUND_F80_F16] = "__truncxfhf2"; Names[RTLIB::FPROUND_F128_F16] = "__trunctfhf2"; @@ -243,10 +253,10 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2"; Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2"; Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2"; - Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2"; + Names[RTLIB::FPROUND_PPCF128_F32] = "__gcc_qtos"; Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2"; Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2"; - Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2"; + Names[RTLIB::FPROUND_PPCF128_F64] = "__gcc_qtod"; Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti"; @@ -259,7 +269,7 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::FPTOSINT_F128_I32] = "__fixtfsi"; Names[RTLIB::FPTOSINT_F128_I64] = "__fixtfdi"; Names[RTLIB::FPTOSINT_F128_I128] = "__fixtfti"; - Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi"; + Names[RTLIB::FPTOSINT_PPCF128_I32] = "__gcc_qtou"; Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi"; Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti"; Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; @@ -281,7 +291,7 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf"; Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf"; Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf"; - Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf"; + Names[RTLIB::SINTTOFP_I32_PPCF128] = "__gcc_itoq"; Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf"; Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf"; Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf"; @@ -296,7 +306,7 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf"; Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf"; Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf"; - Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf"; + Names[RTLIB::UINTTOFP_I32_PPCF128] = "__gcc_utoq"; Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf"; Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf"; Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf"; @@ -310,27 +320,35 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::OEQ_F32] = "__eqsf2"; Names[RTLIB::OEQ_F64] = "__eqdf2"; Names[RTLIB::OEQ_F128] = "__eqtf2"; + Names[RTLIB::OEQ_PPCF128] = "__gcc_qeq"; Names[RTLIB::UNE_F32] = "__nesf2"; Names[RTLIB::UNE_F64] = "__nedf2"; Names[RTLIB::UNE_F128] = "__netf2"; + Names[RTLIB::UNE_PPCF128] = "__gcc_qne"; Names[RTLIB::OGE_F32] = "__gesf2"; Names[RTLIB::OGE_F64] = "__gedf2"; Names[RTLIB::OGE_F128] = "__getf2"; + Names[RTLIB::OGE_PPCF128] = "__gcc_qge"; Names[RTLIB::OLT_F32] = "__ltsf2"; Names[RTLIB::OLT_F64] = "__ltdf2"; Names[RTLIB::OLT_F128] = "__lttf2"; + Names[RTLIB::OLT_PPCF128] = "__gcc_qlt"; Names[RTLIB::OLE_F32] = "__lesf2"; Names[RTLIB::OLE_F64] = "__ledf2"; Names[RTLIB::OLE_F128] = "__letf2"; + Names[RTLIB::OLE_PPCF128] = "__gcc_qle"; Names[RTLIB::OGT_F32] = "__gtsf2"; Names[RTLIB::OGT_F64] = "__gtdf2"; Names[RTLIB::OGT_F128] = "__gttf2"; + Names[RTLIB::OGT_PPCF128] = "__gcc_qgt"; Names[RTLIB::UO_F32] = "__unordsf2"; Names[RTLIB::UO_F64] = "__unorddf2"; Names[RTLIB::UO_F128] = "__unordtf2"; + Names[RTLIB::UO_PPCF128] = "__gcc_qunord"; Names[RTLIB::O_F32] = "__unordsf2"; Names[RTLIB::O_F64] = "__unorddf2"; Names[RTLIB::O_F128] = "__unordtf2"; + Names[RTLIB::O_PPCF128] = "__gcc_qunord"; Names[RTLIB::MEMCPY] = "memcpy"; Names[RTLIB::MEMMOVE] = "memmove"; Names[RTLIB::MEMSET] = "memset"; @@ -395,36 +413,79 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::SYNC_FETCH_AND_UMIN_4] = "__sync_fetch_and_umin_4"; Names[RTLIB::SYNC_FETCH_AND_UMIN_8] = "__sync_fetch_and_umin_8"; Names[RTLIB::SYNC_FETCH_AND_UMIN_16] = "__sync_fetch_and_umin_16"; - - if (TT.getEnvironment() == Triple::GNU) { + + Names[RTLIB::ATOMIC_LOAD] = "__atomic_load"; + Names[RTLIB::ATOMIC_LOAD_1] = "__atomic_load_1"; + Names[RTLIB::ATOMIC_LOAD_2] = "__atomic_load_2"; + Names[RTLIB::ATOMIC_LOAD_4] = "__atomic_load_4"; + Names[RTLIB::ATOMIC_LOAD_8] = "__atomic_load_8"; + Names[RTLIB::ATOMIC_LOAD_16] = "__atomic_load_16"; + + Names[RTLIB::ATOMIC_STORE] = "__atomic_store"; + Names[RTLIB::ATOMIC_STORE_1] = "__atomic_store_1"; + Names[RTLIB::ATOMIC_STORE_2] = "__atomic_store_2"; + Names[RTLIB::ATOMIC_STORE_4] = "__atomic_store_4"; + Names[RTLIB::ATOMIC_STORE_8] = "__atomic_store_8"; + Names[RTLIB::ATOMIC_STORE_16] = "__atomic_store_16"; + + Names[RTLIB::ATOMIC_EXCHANGE] = "__atomic_exchange"; + Names[RTLIB::ATOMIC_EXCHANGE_1] = "__atomic_exchange_1"; + Names[RTLIB::ATOMIC_EXCHANGE_2] = "__atomic_exchange_2"; + Names[RTLIB::ATOMIC_EXCHANGE_4] = "__atomic_exchange_4"; + Names[RTLIB::ATOMIC_EXCHANGE_8] = "__atomic_exchange_8"; + Names[RTLIB::ATOMIC_EXCHANGE_16] = "__atomic_exchange_16"; + + Names[RTLIB::ATOMIC_COMPARE_EXCHANGE] = "__atomic_compare_exchange"; + Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_1] = "__atomic_compare_exchange_1"; + Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_2] = "__atomic_compare_exchange_2"; + Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_4] = "__atomic_compare_exchange_4"; + Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_8] = "__atomic_compare_exchange_8"; + Names[RTLIB::ATOMIC_COMPARE_EXCHANGE_16] = "__atomic_compare_exchange_16"; + + Names[RTLIB::ATOMIC_FETCH_ADD_1] = "__atomic_fetch_add_1"; + Names[RTLIB::ATOMIC_FETCH_ADD_2] = "__atomic_fetch_add_2"; + Names[RTLIB::ATOMIC_FETCH_ADD_4] = "__atomic_fetch_add_4"; + Names[RTLIB::ATOMIC_FETCH_ADD_8] = "__atomic_fetch_add_8"; + Names[RTLIB::ATOMIC_FETCH_ADD_16] = "__atomic_fetch_add_16"; + Names[RTLIB::ATOMIC_FETCH_SUB_1] = "__atomic_fetch_sub_1"; + Names[RTLIB::ATOMIC_FETCH_SUB_2] = "__atomic_fetch_sub_2"; + Names[RTLIB::ATOMIC_FETCH_SUB_4] = "__atomic_fetch_sub_4"; + Names[RTLIB::ATOMIC_FETCH_SUB_8] = "__atomic_fetch_sub_8"; + Names[RTLIB::ATOMIC_FETCH_SUB_16] = "__atomic_fetch_sub_16"; + Names[RTLIB::ATOMIC_FETCH_AND_1] = "__atomic_fetch_and_1"; + Names[RTLIB::ATOMIC_FETCH_AND_2] = "__atomic_fetch_and_2"; + Names[RTLIB::ATOMIC_FETCH_AND_4] = "__atomic_fetch_and_4"; + Names[RTLIB::ATOMIC_FETCH_AND_8] = "__atomic_fetch_and_8"; + Names[RTLIB::ATOMIC_FETCH_AND_16] = "__atomic_fetch_and_16"; + Names[RTLIB::ATOMIC_FETCH_OR_1] = "__atomic_fetch_or_1"; + Names[RTLIB::ATOMIC_FETCH_OR_2] = "__atomic_fetch_or_2"; + Names[RTLIB::ATOMIC_FETCH_OR_4] = "__atomic_fetch_or_4"; + Names[RTLIB::ATOMIC_FETCH_OR_8] = "__atomic_fetch_or_8"; + Names[RTLIB::ATOMIC_FETCH_OR_16] = "__atomic_fetch_or_16"; + Names[RTLIB::ATOMIC_FETCH_XOR_1] = "__atomic_fetch_xor_1"; + Names[RTLIB::ATOMIC_FETCH_XOR_2] = "__atomic_fetch_xor_2"; + Names[RTLIB::ATOMIC_FETCH_XOR_4] = "__atomic_fetch_xor_4"; + Names[RTLIB::ATOMIC_FETCH_XOR_8] = "__atomic_fetch_xor_8"; + Names[RTLIB::ATOMIC_FETCH_XOR_16] = "__atomic_fetch_xor_16"; + Names[RTLIB::ATOMIC_FETCH_NAND_1] = "__atomic_fetch_nand_1"; + Names[RTLIB::ATOMIC_FETCH_NAND_2] = "__atomic_fetch_nand_2"; + Names[RTLIB::ATOMIC_FETCH_NAND_4] = "__atomic_fetch_nand_4"; + Names[RTLIB::ATOMIC_FETCH_NAND_8] = "__atomic_fetch_nand_8"; + Names[RTLIB::ATOMIC_FETCH_NAND_16] = "__atomic_fetch_nand_16"; + + if (TT.isGNUEnvironment()) { Names[RTLIB::SINCOS_F32] = "sincosf"; Names[RTLIB::SINCOS_F64] = "sincos"; Names[RTLIB::SINCOS_F80] = "sincosl"; Names[RTLIB::SINCOS_F128] = "sincosl"; Names[RTLIB::SINCOS_PPCF128] = "sincosl"; - } else { - // These are generally not available. - Names[RTLIB::SINCOS_F32] = nullptr; - Names[RTLIB::SINCOS_F64] = nullptr; - Names[RTLIB::SINCOS_F80] = nullptr; - Names[RTLIB::SINCOS_F128] = nullptr; - Names[RTLIB::SINCOS_PPCF128] = nullptr; } if (!TT.isOSOpenBSD()) { Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail"; - } else { - // These are generally not available. - Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = nullptr; } - // For f16/f32 conversions, Darwin uses the standard naming scheme, instead - // of the gnueabi-style __gnu_*_ieee. - // FIXME: What about other targets? - if (TT.isOSDarwin()) { - Names[RTLIB::FPEXT_F16_F32] = "__extendhfsf2"; - Names[RTLIB::FPROUND_F32_F16] = "__truncsfhf2"; - } + Names[RTLIB::DEOPTIMIZE] = "__llvm_deoptimize"; } /// InitLibcallCallingConvs - Set default libcall CallingConvs. @@ -446,9 +507,13 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { return FPEXT_F32_F64; if (RetVT == MVT::f128) return FPEXT_F32_F128; + if (RetVT == MVT::ppcf128) + return FPEXT_F32_PPCF128; } else if (OpVT == MVT::f64) { if (RetVT == MVT::f128) return FPEXT_F64_F128; + else if (RetVT == MVT::ppcf128) + return FPEXT_F64_PPCF128; } return UNKNOWN_LIBCALL; @@ -653,7 +718,7 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { return UNKNOWN_LIBCALL; } -RTLIB::Libcall RTLIB::getATOMIC(unsigned Opc, MVT VT) { +RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) { #define OP_TO_LIBCALL(Name, Enum) \ case Name: \ switch (VT.SimpleTy) { \ @@ -698,27 +763,35 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { CCs[RTLIB::OEQ_F32] = ISD::SETEQ; CCs[RTLIB::OEQ_F64] = ISD::SETEQ; CCs[RTLIB::OEQ_F128] = ISD::SETEQ; + CCs[RTLIB::OEQ_PPCF128] = ISD::SETEQ; CCs[RTLIB::UNE_F32] = ISD::SETNE; CCs[RTLIB::UNE_F64] = ISD::SETNE; CCs[RTLIB::UNE_F128] = ISD::SETNE; + CCs[RTLIB::UNE_PPCF128] = ISD::SETNE; CCs[RTLIB::OGE_F32] = ISD::SETGE; CCs[RTLIB::OGE_F64] = ISD::SETGE; CCs[RTLIB::OGE_F128] = ISD::SETGE; + CCs[RTLIB::OGE_PPCF128] = ISD::SETGE; CCs[RTLIB::OLT_F32] = ISD::SETLT; CCs[RTLIB::OLT_F64] = ISD::SETLT; CCs[RTLIB::OLT_F128] = ISD::SETLT; + CCs[RTLIB::OLT_PPCF128] = ISD::SETLT; CCs[RTLIB::OLE_F32] = ISD::SETLE; CCs[RTLIB::OLE_F64] = ISD::SETLE; CCs[RTLIB::OLE_F128] = ISD::SETLE; + CCs[RTLIB::OLE_PPCF128] = ISD::SETLE; CCs[RTLIB::OGT_F32] = ISD::SETGT; CCs[RTLIB::OGT_F64] = ISD::SETGT; CCs[RTLIB::OGT_F128] = ISD::SETGT; + CCs[RTLIB::OGT_PPCF128] = ISD::SETGT; CCs[RTLIB::UO_F32] = ISD::SETNE; CCs[RTLIB::UO_F64] = ISD::SETNE; CCs[RTLIB::UO_F128] = ISD::SETNE; + CCs[RTLIB::UO_PPCF128] = ISD::SETNE; CCs[RTLIB::O_F32] = ISD::SETEQ; CCs[RTLIB::O_F64] = ISD::SETEQ; CCs[RTLIB::O_F128] = ISD::SETEQ; + CCs[RTLIB::O_PPCF128] = ISD::SETEQ; } /// NOTE: The TargetMachine owns TLOF. @@ -752,8 +825,14 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { PrefLoopAlignment = 0; GatherAllAliasesMaxDepth = 6; MinStackArgumentAlignment = 1; - InsertFencesForAtomic = false; MinimumJumpTableEntries = 4; + // TODO: the default will be switched to 0 in the next commit, along + // with the Target-specific changes necessary. + MaxAtomicSizeInBitsSupported = 1024; + + MinCmpXchgSizeInBits = 0; + + std::fill(std::begin(LibcallRoutineNames), std::end(LibcallRoutineNames), nullptr); InitLibcallNames(LibcallRoutineNames, TM.getTargetTriple()); InitCmpLibcallCCs(CmpLibcallCCs); @@ -767,8 +846,9 @@ void TargetLoweringBase::initActions() { memset(TruncStoreActions, 0, sizeof(TruncStoreActions)); memset(IndexedModeActions, 0, sizeof(IndexedModeActions)); memset(CondCodeActions, 0, sizeof(CondCodeActions)); - memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); - memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); + std::fill(std::begin(RegClassForVT), std::end(RegClassForVT), nullptr); + std::fill(std::begin(TargetDAGCombineArray), + std::end(TargetDAGCombineArray), 0); // Set default actions for various operations. for (MVT VT : MVT::all_valuetypes()) { @@ -803,6 +883,10 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::SMULO, VT, Expand); setOperationAction(ISD::UMULO, VT, Expand); + // These default to Expand so they will be expanded to CTLZ/CTTZ by default. + setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); + setOperationAction(ISD::BITREVERSE, VT, Expand); // These library functions default to expand. @@ -816,7 +900,7 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand); } - // For most targets @llvm.get.dynamic.area.offest just returns 0. + // For most targets @llvm.get.dynamic.area.offset just returns 0. setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand); } @@ -843,8 +927,6 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FEXP , VT, Expand); setOperationAction(ISD::FEXP2, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); - setOperationAction(ISD::FMINNUM, VT, Expand); - setOperationAction(ISD::FMAXNUM, VT, Expand); setOperationAction(ISD::FNEARBYINT, VT, Expand); setOperationAction(ISD::FCEIL, VT, Expand); setOperationAction(ISD::FRINT, VT, Expand); @@ -1090,9 +1172,10 @@ bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const { /// Replace/modify any TargetFrameIndex operands with a targte-dependent /// sequence of memory operands that is recognized by PrologEpilogInserter. -MachineBasicBlock* -TargetLoweringBase::emitPatchPoint(MachineInstr *MI, +MachineBasicBlock * +TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI, MachineBasicBlock *MBB) const { + MachineInstr *MI = &InitialMI; MachineFunction &MF = *MI->getParent()->getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); @@ -1151,7 +1234,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI, // Add a new memory operand for this FI. assert(MFI.getObjectOffset(FI) != -1); - unsigned Flags = MachineMemOperand::MOLoad; + auto Flags = MachineMemOperand::MOLoad; if (MI->getOpcode() == TargetOpcode::STATEPOINT) { Flags |= MachineMemOperand::MOStore; Flags |= MachineMemOperand::MOVolatile; @@ -1250,10 +1333,17 @@ void TargetLoweringBase::computeRegisterProperties( // ppcf128 type is really two f64's. if (!isTypeLegal(MVT::ppcf128)) { - NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; - RegisterTypeForVT[MVT::ppcf128] = MVT::f64; - TransformToType[MVT::ppcf128] = MVT::f64; - ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat); + if (isTypeLegal(MVT::f64)) { + NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; + RegisterTypeForVT[MVT::ppcf128] = MVT::f64; + TransformToType[MVT::ppcf128] = MVT::f64; + ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat); + } else { + NumRegistersForVT[MVT::ppcf128] = NumRegistersForVT[MVT::i128]; + RegisterTypeForVT[MVT::ppcf128] = RegisterTypeForVT[MVT::i128]; + TransformToType[MVT::ppcf128] = MVT::i128; + ValueTypeActions.setTypeAction(MVT::ppcf128, TypeSoftenFloat); + } } // Decide how to handle f128. If the target does not have native f128 support, @@ -1308,13 +1398,12 @@ void TargetLoweringBase::computeRegisterProperties( case TypePromoteInteger: { // Try to promote the elements of integer vectors. If no legal // promotion was found, fall through to the widen-vector method. - for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + for (unsigned nVT = i + 1; nVT <= MVT::LAST_INTEGER_VECTOR_VALUETYPE; ++nVT) { MVT SVT = (MVT::SimpleValueType) nVT; // Promote vectors of integers to vectors with the same number // of elements, with a wider element type. - if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() - && SVT.getVectorNumElements() == NElts && isTypeLegal(SVT) - && SVT.getScalarType().isInteger()) { + if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() && + SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; @@ -1553,6 +1642,9 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast); } +BranchProbability TargetLoweringBase::getPredictableBranchThreshold() const { + return BranchProbability(MinPercentageForPredictableBranch, 100); +} //===----------------------------------------------------------------------===// // TargetTransformInfo Helpers @@ -1715,3 +1807,36 @@ bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL, return true; } + +//===----------------------------------------------------------------------===// +// Stack Protector +//===----------------------------------------------------------------------===// + +// For OpenBSD return its special guard variable. Otherwise return nullptr, +// so that SelectionDAG handle SSP. +Value *TargetLoweringBase::getIRStackGuard(IRBuilder<> &IRB) const { + if (getTargetMachine().getTargetTriple().isOSOpenBSD()) { + Module &M = *IRB.GetInsertBlock()->getParent()->getParent(); + PointerType *PtrTy = Type::getInt8PtrTy(M.getContext()); + auto Guard = cast<GlobalValue>(M.getOrInsertGlobal("__guard_local", PtrTy)); + Guard->setVisibility(GlobalValue::HiddenVisibility); + return Guard; + } + return nullptr; +} + +// Currently only support "standard" __stack_chk_guard. +// TODO: add LOAD_STACK_GUARD support. +void TargetLoweringBase::insertSSPDeclarations(Module &M) const { + M.getOrInsertGlobal("__stack_chk_guard", Type::getInt8PtrTy(M.getContext())); +} + +// Currently only support "standard" __stack_chk_guard. +// TODO: add LOAD_STACK_GUARD support. +Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const { + return M.getGlobalVariable("__stack_chk_guard", true); +} + +Value *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const { + return nullptr; +} diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 58ae9cc..5f814c9 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -33,6 +33,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" +#include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/COFF.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ELF.h" @@ -68,11 +69,9 @@ void TargetLoweringObjectFileELF::emitPersonalityValue( cast<MCSymbolELF>(getContext().getOrCreateSymbol(NameData)); Streamer.EmitSymbolAttribute(Label, MCSA_Hidden); Streamer.EmitSymbolAttribute(Label, MCSA_Weak); - StringRef Prefix = ".data."; - NameData.insert(NameData.begin(), Prefix.begin(), Prefix.end()); unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP; - MCSection *Sec = getContext().getELFSection(NameData, ELF::SHT_PROGBITS, - Flags, 0, Label->getName()); + MCSection *Sec = getContext().getELFNamedSection(".data", Label->getName(), + ELF::SHT_PROGBITS, Flags, 0); unsigned Size = DL.getPointerSize(); Streamer.SwitchSection(Sec); Streamer.EmitValueToAlignment(DL.getPointerABIAlignment()); @@ -119,6 +118,10 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) { // section(".eh_frame") gcc will produce: // // .section .eh_frame,"a",@progbits + + if (Name == getInstrProfCoverageSectionName(false)) + return SectionKind::getMetadata(); + if (Name.empty() || Name[0] != '.') return K; // Some lame default implementation based on some magic section names. @@ -259,9 +262,11 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV, EntrySize = 4; } else if (Kind.isMergeableConst8()) { EntrySize = 8; - } else { - assert(Kind.isMergeableConst16() && "unknown data width"); + } else if (Kind.isMergeableConst16()) { EntrySize = 16; + } else { + assert(Kind.isMergeableConst32() && "unknown data width"); + EntrySize = 32; } } @@ -288,12 +293,14 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV, } else { Name = getSectionPrefixForGlobal(Kind); } + // FIXME: Extend the section prefix to include hotness catagories such as .hot + // or .unlikely for functions. if (EmitUniqueSection && UniqueSectionNames) { Name.push_back('.'); TM.getNameWithPrefix(Name, GV, Mang, true); } - unsigned UniqueID = ~0; + unsigned UniqueID = MCContext::GenericSectionID; if (EmitUniqueSection && !UniqueSectionNames) { UniqueID = *NextUniqueID; (*NextUniqueID)++; @@ -346,13 +353,16 @@ bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection( /// Given a mergeable constant with the specified size and relocation /// information, return a section that it should be placed in. MCSection *TargetLoweringObjectFileELF::getSectionForConstant( - const DataLayout &DL, SectionKind Kind, const Constant *C) const { + const DataLayout &DL, SectionKind Kind, const Constant *C, + unsigned &Align) const { if (Kind.isMergeableConst4() && MergeableConst4Section) return MergeableConst4Section; if (Kind.isMergeableConst8() && MergeableConst8Section) return MergeableConst8Section; if (Kind.isMergeableConst16() && MergeableConst16Section) return MergeableConst16Section; + if (Kind.isMergeableConst32() && MergeableConst32Section) + return MergeableConst32Section; if (Kind.isReadOnly()) return ReadOnlySection; @@ -412,6 +422,27 @@ MCSection *TargetLoweringObjectFileELF::getStaticDtorSection( KeySym); } +const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference( + const GlobalValue *LHS, const GlobalValue *RHS, Mangler &Mang, + const TargetMachine &TM) const { + // We may only use a PLT-relative relocation to refer to unnamed_addr + // functions. + if (!LHS->hasGlobalUnnamedAddr() || !LHS->getValueType()->isFunctionTy()) + return nullptr; + + // Basic sanity checks. + if (LHS->getType()->getPointerAddressSpace() != 0 || + RHS->getType()->getPointerAddressSpace() != 0 || LHS->isThreadLocal() || + RHS->isThreadLocal()) + return nullptr; + + return MCBinaryExpr::createSub( + MCSymbolRefExpr::create(TM.getSymbol(LHS, Mang), PLTRelativeVariantKind, + getContext()), + MCSymbolRefExpr::create(TM.getSymbol(RHS, Mang), getContext()), + getContext()); +} + void TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) { UseInitArray = UseInitArray_; @@ -443,10 +474,7 @@ emitModuleFlags(MCStreamer &Streamer, MDNode *LinkerOptions = nullptr; StringRef SectionVal; - for (ArrayRef<Module::ModuleFlagEntry>::iterator - i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) { - const Module::ModuleFlagEntry &MFE = *i; - + for (const auto &MFE : ModuleFlags) { // Ignore flags with 'Require' behavior. if (MFE.Behavior == Module::Require) continue; @@ -459,6 +487,7 @@ emitModuleFlags(MCStreamer &Streamer, } else if (Key == "Objective-C Garbage Collection" || Key == "Objective-C GC Only" || Key == "Objective-C Is Simulated" || + Key == "Objective-C Class Properties" || Key == "Objective-C Image Swift Version") { ImageInfoFlags |= mdconst::extract<ConstantInt>(Val)->getZExtValue(); } else if (Key == "Objective-C Image Info Section") { @@ -470,16 +499,10 @@ emitModuleFlags(MCStreamer &Streamer, // Emit the linker options if present. if (LinkerOptions) { - for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) { - MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i)); + for (const auto &Option : LinkerOptions->operands()) { SmallVector<std::string, 4> StrOptions; - - // Convert to strings. - for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) { - MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii)); - StrOptions.push_back(MDOption->getString()); - } - + for (const auto &Piece : cast<MDNode>(Option)->operands()) + StrOptions.push_back(cast<MDString>(Piece)->getString()); Streamer.EmitLinkerOptions(StrOptions); } } @@ -630,7 +653,8 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal( } MCSection *TargetLoweringObjectFileMachO::getSectionForConstant( - const DataLayout &DL, SectionKind Kind, const Constant *C) const { + const DataLayout &DL, SectionKind Kind, const Constant *C, + unsigned &Align) const { // If this constant requires a relocation, we have to put it in the data // segment, not in the text segment. if (Kind.isData() || Kind.isReadOnlyWithRel()) @@ -660,9 +684,7 @@ const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference( // Add information about the stub reference to MachOMMI so that the stub // gets emitted by the asmprinter. - MachineModuleInfoImpl::StubValueTy &StubSym = - GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) : - MachOMMI.getGVStubEntry(SSym); + MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); if (!StubSym.getPointer()) { MCSymbol *Sym = TM.getSymbol(GV, Mang); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); @@ -785,8 +807,9 @@ void TargetLoweringObjectFileMachO::getNameWithPrefix( //===----------------------------------------------------------------------===// static unsigned -getCOFFSectionFlags(SectionKind K) { +getCOFFSectionFlags(SectionKind K, const TargetMachine &TM) { unsigned Flags = 0; + bool isThumb = TM.getTargetTriple().getArch() == Triple::thumb; if (K.isMetadata()) Flags |= @@ -795,7 +818,8 @@ getCOFFSectionFlags(SectionKind K) { Flags |= COFF::IMAGE_SCN_MEM_EXECUTE | COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_CNT_CODE; + COFF::IMAGE_SCN_CNT_CODE | + (isThumb ? COFF::IMAGE_SCN_MEM_16BIT : (COFF::SectionCharacteristics)0); else if (K.isBSS()) Flags |= COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | @@ -865,7 +889,7 @@ MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( const GlobalValue *GV, SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const { int Selection = 0; - unsigned Characteristics = getCOFFSectionFlags(Kind); + unsigned Characteristics = getCOFFSectionFlags(Kind, TM); StringRef Name = GV->getSection(); StringRef COMDATSymName = ""; if (GV->hasComdat()) { @@ -884,10 +908,8 @@ MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( Selection = 0; } } - return getContext().getCOFFSection(Name, - Characteristics, - Kind, - COMDATSymName, + + return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName, Selection); } @@ -916,7 +938,7 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal( if ((EmitUniquedSection && !Kind.isCommon()) || GV->hasComdat()) { const char *Name = getCOFFSectionNameForUniqueGlobal(Kind); - unsigned Characteristics = getCOFFSectionFlags(Kind); + unsigned Characteristics = getCOFFSectionFlags(Kind, TM); Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; int Selection = getSelectionForCOFF(GV); @@ -928,16 +950,20 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal( else ComdatGV = GV; + unsigned UniqueID = MCContext::GenericSectionID; + if (EmitUniquedSection) + UniqueID = NextUniqueID++; + if (!ComdatGV->hasPrivateLinkage()) { MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang); StringRef COMDATSymName = Sym->getName(); return getContext().getCOFFSection(Name, Characteristics, Kind, - COMDATSymName, Selection); + COMDATSymName, Selection, UniqueID); } else { SmallString<256> TmpData; Mang.getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true); return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData, - Selection); + Selection, UniqueID); } } @@ -989,11 +1015,12 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable( SectionKind Kind = SectionKind::getReadOnly(); const char *Name = getCOFFSectionNameForUniqueGlobal(Kind); - unsigned Characteristics = getCOFFSectionFlags(Kind); + unsigned Characteristics = getCOFFSectionFlags(Kind, TM); Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; + unsigned UniqueID = NextUniqueID++; return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName, - COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE); + COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID); } void TargetLoweringObjectFileCOFF:: @@ -1002,32 +1029,25 @@ emitModuleFlags(MCStreamer &Streamer, Mangler &Mang, const TargetMachine &TM) const { MDNode *LinkerOptions = nullptr; - // Look for the "Linker Options" flag, since it's the only one we support. - for (ArrayRef<Module::ModuleFlagEntry>::iterator - i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) { - const Module::ModuleFlagEntry &MFE = *i; + for (const auto &MFE : ModuleFlags) { StringRef Key = MFE.Key->getString(); - Metadata *Val = MFE.Val; - if (Key == "Linker Options") { - LinkerOptions = cast<MDNode>(Val); - break; - } + if (Key == "Linker Options") + LinkerOptions = cast<MDNode>(MFE.Val); } - if (!LinkerOptions) - return; - // Emit the linker options to the linker .drectve section. According to the - // spec, this section is a space-separated string containing flags for linker. - MCSection *Sec = getDrectveSection(); - Streamer.SwitchSection(Sec); - for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) { - MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i)); - for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) { - MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii)); - // Lead with a space for consistency with our dllexport implementation. - std::string Directive(" "); - Directive.append(MDOption->getString()); - Streamer.EmitBytes(Directive); + if (LinkerOptions) { + // Emit the linker options to the linker .drectve section. According to the + // spec, this section is a space-separated string containing flags for + // linker. + MCSection *Sec = getDrectveSection(); + Streamer.SwitchSection(Sec); + for (const auto &Option : LinkerOptions->operands()) { + for (const auto &Piece : cast<MDNode>(Option)->operands()) { + // Lead with a space for consistency with our dllexport implementation. + std::string Directive(" "); + Directive.append(cast<MDString>(Piece)->getString()); + Streamer.EmitBytes(Directive); + } } } } @@ -1035,13 +1055,13 @@ emitModuleFlags(MCStreamer &Streamer, MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection( unsigned Priority, const MCSymbol *KeySym) const { return getContext().getAssociativeCOFFSection( - cast<MCSectionCOFF>(StaticCtorSection), KeySym); + cast<MCSectionCOFF>(StaticCtorSection), KeySym, 0); } MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection( unsigned Priority, const MCSymbol *KeySym) const { return getContext().getAssociativeCOFFSection( - cast<MCSectionCOFF>(StaticDtorSection), KeySym); + cast<MCSectionCOFF>(StaticDtorSection), KeySym, 0); } void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal( diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp index 873f712..b8c8209 100644 --- a/contrib/llvm/lib/CodeGen/Passes.cpp +++ b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1,4 +1,4 @@ -//===-- Passes.cpp - Target independent code generation passes ------------===// +//===-- TargetPassConfig.cpp - Target independent code generation passes --===// // // The LLVM Compiler Infrastructure // @@ -12,22 +12,26 @@ // //===---------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" + #include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/CFLAliasAnalysis.h" +#include "llvm/Analysis/CFLAndersAliasAnalysis.h" +#include "llvm/Analysis/CFLSteensAliasAnalysis.h" +#include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" @@ -106,9 +110,19 @@ cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden, static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden, cl::desc("Run live interval analysis earlier in the pipeline")); -static cl::opt<bool> UseCFLAA("use-cfl-aa-in-codegen", - cl::init(false), cl::Hidden, - cl::desc("Enable the new, experimental CFL alias analysis in CodeGen")); +// Experimental option to use CFL-AA in codegen +enum class CFLAAType { None, Steensgaard, Andersen, Both }; +static cl::opt<CFLAAType> UseCFLAA( + "use-cfl-aa-in-codegen", cl::init(CFLAAType::None), cl::Hidden, + cl::desc("Enable the new, experimental CFL alias analysis in CodeGen"), + cl::values(clEnumValN(CFLAAType::None, "none", "Disable CFL-AA"), + clEnumValN(CFLAAType::Steensgaard, "steens", + "Enable unification-based CFL-AA"), + clEnumValN(CFLAAType::Andersen, "anders", + "Enable inclusion-based CFL-AA"), + clEnumValN(CFLAAType::Both, "both", + "Enable both variants of CFL-AA"), + clEnumValEnd)); /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. @@ -241,7 +255,7 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) : ImmutablePass(ID), PM(&pm), StartBefore(nullptr), StartAfter(nullptr), StopAfter(nullptr), Started(true), Stopped(false), AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false), - DisableVerify(false), EnableTailMerge(true) { + DisableVerify(false), EnableTailMerge(true) { Impl = new PassConfigImpl(); @@ -256,6 +270,13 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) // Substitute Pseudo Pass IDs for real ones. substitutePass(&EarlyTailDuplicateID, &TailDuplicateID); substitutePass(&PostRAMachineLICMID, &MachineLICMID); + + if (StringRef(PrintMachineInstrs.getValue()).equals("")) + TM->Options.PrintMachineCode = true; +} + +CodeGenOpt::Level TargetPassConfig::getOptLevel() const { + return TM->getOptLevel(); } /// Insert InsertedPassID pass after TargetPassID. @@ -303,6 +324,13 @@ IdentifyingPassPtr TargetPassConfig::getPassSubstitution(AnalysisID ID) const { return I->second; } +bool TargetPassConfig::isPassSubstitutedOrOverridden(AnalysisID ID) const { + IdentifyingPassPtr TargetID = getPassSubstitution(ID); + IdentifyingPassPtr FinalPtr = overridePass(ID, TargetID); + return !FinalPtr.isValid() || FinalPtr.isInstance() || + FinalPtr.getID() != ID; +} + /// Add a pass to the PassManager if that pass is supposed to be run. If the /// Started/Stopped flags indicate either that the compilation should start at /// a later pass or that it should stop after an earlier pass, then do not add @@ -392,12 +420,25 @@ void TargetPassConfig::addVerifyPass(const std::string &Banner) { /// Add common target configurable passes that perform LLVM IR to IR transforms /// following machine independent optimization. void TargetPassConfig::addIRPasses() { + switch (UseCFLAA) { + case CFLAAType::Steensgaard: + addPass(createCFLSteensAAWrapperPass()); + break; + case CFLAAType::Andersen: + addPass(createCFLAndersAAWrapperPass()); + break; + case CFLAAType::Both: + addPass(createCFLAndersAAWrapperPass()); + addPass(createCFLSteensAAWrapperPass()); + break; + default: + break; + } + // Basic AliasAnalysis support. // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that // BasicAliasAnalysis wins if they disagree. This is intended to help // support "obvious" type-punning idioms. - if (UseCFLAA) - addPass(createCFLAAWrapperPass()); addPass(createTypeBasedAAWrapperPass()); addPass(createScopedNoAliasAAWrapperPass()); addPass(createBasicAAWrapperPass()); @@ -476,6 +517,10 @@ void TargetPassConfig::addCodeGenPrepare() { void TargetPassConfig::addISelPrepare() { addPreISel(); + // Force codegen to run according to the callgraph. + if (TM->Options.EnableIPRA) + addPass(new DummyCGSCCPass); + // Add both the safe stack and the stack protection passes: each of them will // only protect functions that have corresponding attributes. addPass(createSafeStackPass(TM)); @@ -512,12 +557,12 @@ void TargetPassConfig::addISelPrepare() { void TargetPassConfig::addMachinePasses() { AddingMachinePasses = true; + if (TM->Options.EnableIPRA) + addPass(createRegUsageInfoPropPass()); + // Insert a machine instr printer pass after the specified pass. - // If -print-machineinstrs specified, print machineinstrs after all passes. - if (StringRef(PrintMachineInstrs.getValue()).equals("")) - TM->Options.PrintMachineCode = true; - else if (!StringRef(PrintMachineInstrs.getValue()) - .equals("option-unspecified")) { + if (!StringRef(PrintMachineInstrs.getValue()).equals("") && + !StringRef(PrintMachineInstrs.getValue()).equals("option-unspecified")) { const PassRegistry *PR = PassRegistry::getPassRegistry(); const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue()); const PassInfo *IPI = PR->getPassInfo(StringRef("machineinstr-printer")); @@ -556,10 +601,13 @@ void TargetPassConfig::addMachinePasses() { addPostRegAlloc(); // Insert prolog/epilog code. Eliminate abstract frame index references... - if (getOptLevel() != CodeGenOpt::None) + if (getOptLevel() != CodeGenOpt::None) addPass(&ShrinkWrapID); - addPass(&PrologEpilogCodeInserterID); + // Prolog/Epilog inserter needs a TargetMachine to instantiate. But only + // do so if it hasn't been disabled, substituted, or overridden. + if (!isPassSubstitutedOrOverridden(&PrologEpilogCodeInserterID)) + addPass(createPrologEpilogInserterPass(TM)); /// Add passes that optimize machine instructions after register allocation. if (getOptLevel() != CodeGenOpt::None) @@ -597,11 +645,19 @@ void TargetPassConfig::addMachinePasses() { addPreEmitPass(); + if (TM->Options.EnableIPRA) + // Collect register usage information and produce a register mask of + // clobbered registers, to be used to optimize call sites. + addPass(createRegUsageInfoCollector()); + addPass(&FuncletLayoutID, false); addPass(&StackMapLivenessID, false); addPass(&LiveDebugValuesID, false); + addPass(&XRayInstrumentationID, false); + addPass(&PatchableFunctionID, false); + AddingMachinePasses = false; } @@ -661,6 +717,7 @@ MachinePassRegistry RegisterRegAlloc::Registry; /// A dummy default pass factory indicates whether the register allocator is /// overridden on the command line. +LLVM_DEFINE_ONCE_FLAG(InitializeDefaultRegisterAllocatorFlag); static FunctionPass *useDefaultRegisterAllocator() { return nullptr; } static RegisterRegAlloc defaultRegAlloc("default", @@ -674,6 +731,15 @@ RegAlloc("regalloc", cl::init(&useDefaultRegisterAllocator), cl::desc("Register allocator to use")); +static void initializeDefaultRegisterAllocatorOnce() { + RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault(); + + if (!Ctor) { + Ctor = RegAlloc; + RegisterRegAlloc::setDefault(RegAlloc); + } +} + /// Instantiate the default register allocator pass for this target for either /// the optimized or unoptimized allocation path. This will be added to the pass @@ -700,13 +766,11 @@ FunctionPass *TargetPassConfig::createTargetRegisterAllocator(bool Optimized) { /// FIXME: When MachinePassRegistry register pass IDs instead of function ptrs, /// this can be folded into addPass. FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) { - RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault(); - // Initialize the global default. - if (!Ctor) { - Ctor = RegAlloc; - RegisterRegAlloc::setDefault(RegAlloc); - } + llvm::call_once(InitializeDefaultRegisterAllocatorFlag, + initializeDefaultRegisterAllocatorOnce); + + RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault(); if (Ctor != useDefaultRegisterAllocator) return Ctor(); @@ -734,6 +798,8 @@ void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { /// optimized register allocation, including coalescing, machine instruction /// scheduling, and register allocation itself. void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { + addPass(&DetectDeadLanesID, false); + addPass(&ProcessImplicitDefsID, false); // LiveVariables currently requires pure SSA form. @@ -755,6 +821,11 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { addPass(&TwoAddressInstructionPassID, false); addPass(&RegisterCoalescerID); + // The machine scheduler may accidentally create disconnected components + // when moving subregister definitions around, avoid this by splitting them to + // separate vregs before. Splitting can also improve reg. allocation quality. + addPass(&RenameIndependentSubregsID); + // PreRA instruction scheduling. addPass(&MachineSchedulerID); @@ -809,7 +880,7 @@ bool TargetPassConfig::addGCPasses() { /// Add standard basic block placement passes. void TargetPassConfig::addBlockPlacement() { - if (addPass(&MachineBlockPlacementID, false)) { + if (addPass(&MachineBlockPlacementID)) { // Run a separate pass to collect block placement statistics. if (EnableBlockPlacementStats) addPass(&MachineBlockPlacementStatsID); diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp index 0a7042a..e1d90cb 100644 --- a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -112,18 +112,11 @@ TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const { if (!RC || RC->isAllocatable()) return RC; - const unsigned *SubClass = RC->getSubClassMask(); - for (unsigned Base = 0, BaseE = getNumRegClasses(); - Base < BaseE; Base += 32) { - unsigned Idx = Base; - for (unsigned Mask = *SubClass++; Mask; Mask >>= 1) { - unsigned Offset = countTrailingZeros(Mask); - const TargetRegisterClass *SubRC = getRegClass(Idx + Offset); - if (SubRC->isAllocatable()) - return SubRC; - Mask >>= Offset; - Idx += Offset + 1; - } + for (BitMaskClassIterator It(RC->getSubClassMask(), *this); It.isValid(); + ++It) { + const TargetRegisterClass *SubRC = getRegClass(It.getID()); + if (SubRC->isAllocatable()) + return SubRC; } return nullptr; } @@ -388,6 +381,15 @@ bool TargetRegisterInfo::needsStackRealignment( return false; } +bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0, + const uint32_t *mask1) const { + unsigned N = (getNumRegs()+31) / 32; + for (unsigned I = 0; I < N; ++I) + if ((mask0[I] & mask1[I]) != mask0[I]) + return false; + return true; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex, diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp index 1c4558c..022e912 100644 --- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp +++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp @@ -77,7 +77,7 @@ unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC) const { if (hasInstrItineraries()) { int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass()); - return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, MI); + return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, *MI); } if (hasInstrSchedModel()) { if (!SC) @@ -156,13 +156,13 @@ unsigned TargetSchedModel::computeOperandLatency( const MachineInstr *UseMI, unsigned UseOperIdx) const { if (!hasInstrSchedModel() && !hasInstrItineraries()) - return TII->defaultDefLatency(SchedModel, DefMI); + return TII->defaultDefLatency(SchedModel, *DefMI); if (hasInstrItineraries()) { int OperLatency = 0; if (UseMI) { - OperLatency = TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, - UseMI, UseOperIdx); + OperLatency = TII->getOperandLatency(&InstrItins, *DefMI, DefOperIdx, + *UseMI, UseOperIdx); } else { unsigned DefClass = DefMI->getDesc().getSchedClass(); @@ -172,15 +172,15 @@ unsigned TargetSchedModel::computeOperandLatency( return OperLatency; // No operand latency was found. - unsigned InstrLatency = TII->getInstrLatency(&InstrItins, DefMI); + unsigned InstrLatency = TII->getInstrLatency(&InstrItins, *DefMI); // Expected latency is the max of the stage latency and itinerary props. // Rather than directly querying InstrItins stage latency, we call a TII // hook to allow subtargets to specialize latency. This hook is only // applicable to the InstrItins model. InstrSchedModel should model all // special cases without TII hooks. - InstrLatency = std::max(InstrLatency, - TII->defaultDefLatency(SchedModel, DefMI)); + InstrLatency = + std::max(InstrLatency, TII->defaultDefLatency(SchedModel, *DefMI)); return InstrLatency; } // hasInstrSchedModel() @@ -219,7 +219,7 @@ unsigned TargetSchedModel::computeOperandLatency( // FIXME: Automatically giving all implicit defs defaultDefLatency is // undesirable. We should only do it for defs that are known to the MC // desc like flags. Truly implicit defs should get 1 cycle latency. - return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, DefMI); + return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, *DefMI); } unsigned @@ -254,24 +254,23 @@ TargetSchedModel::computeInstrLatency(const MachineInstr *MI, // Allow subtargets to compute Bundle latencies outside the machine model. if (hasInstrItineraries() || MI->isBundle() || (!hasInstrSchedModel() && !UseDefaultDefLatency)) - return TII->getInstrLatency(&InstrItins, MI); + return TII->getInstrLatency(&InstrItins, *MI); if (hasInstrSchedModel()) { const MCSchedClassDesc *SCDesc = resolveSchedClass(MI); if (SCDesc->isValid()) return computeInstrLatency(*SCDesc); } - return TII->defaultDefLatency(SchedModel, MI); + return TII->defaultDefLatency(SchedModel, *MI); } unsigned TargetSchedModel:: computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *DepMI) const { - if (SchedModel.MicroOpBufferSize <= 1) + if (!SchedModel.isOutOfOrder()) return 1; - // MicroOpBufferSize > 1 indicates an out-of-order processor that can dispatch - // WAW dependencies in the same cycle. + // Out-of-order processor can dispatch WAW dependencies in the same cycle. // Treat predication as a data dependency for out-of-order cpus. In-order // cpus do not need to treat predicated writes specially. @@ -282,7 +281,7 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, unsigned Reg = DefMI->getOperand(DefOperIdx).getReg(); const MachineFunction &MF = *DefMI->getParent()->getParent(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(DepMI)) + if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(*DepMI)) return computeInstrLatency(DefMI); // If we have a per operand scheduling model, check if this def is writing diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index c6bae24..8feb18b 100644 --- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -27,11 +27,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -40,6 +38,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" @@ -50,6 +49,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" + using namespace llvm; #define DEBUG_TYPE "twoaddrinstr" @@ -156,6 +156,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired<AAResultsWrapperPass>(); + AU.addUsedIfAvailable<LiveVariables>(); AU.addPreserved<LiveVariables>(); AU.addPreserved<SlotIndexes>(); AU.addPreserved<LiveIntervals>(); @@ -245,7 +246,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, // appropriate location, we can try to sink the current instruction // past it. if (!KillMI || KillMI->getParent() != MBB || KillMI == MI || - KillMI == OldPos || KillMI->isTerminator()) + MachineBasicBlock::iterator(KillMI) == OldPos || KillMI->isTerminator()) return false; // If any of the definitions are used by another instruction between the @@ -259,16 +260,15 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, ++KillPos; unsigned NumVisited = 0; - for (MachineBasicBlock::iterator I = std::next(OldPos); I != KillPos; ++I) { - MachineInstr *OtherMI = I; + for (MachineInstr &OtherMI : llvm::make_range(std::next(OldPos), KillPos)) { // DBG_VALUE cannot be counted against the limit. - if (OtherMI->isDebugValue()) + if (OtherMI.isDebugValue()) continue; if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost. return false; ++NumVisited; - for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = OtherMI->getOperand(i); + for (unsigned i = 0, e = OtherMI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = OtherMI.getOperand(i); if (!MO.isReg()) continue; unsigned MOReg = MO.getReg(); @@ -277,8 +277,8 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, if (DefReg == MOReg) return false; - if (MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS))) { - if (OtherMI == KillMI && MOReg == SavedReg) + if (MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS))) { + if (&OtherMI == KillMI && MOReg == SavedReg) // Save the operand that kills the register. We want to unset the kill // marker if we can sink MI past it. KillMO = &MO; @@ -297,7 +297,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, KillMO->setIsKill(true); if (LV) - LV->replaceKillInstruction(SavedReg, KillMI, MI); + LV->replaceKillInstruction(SavedReg, *KillMI, *MI); } // Move instruction to its destination. @@ -305,7 +305,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, MBB->insert(KillPos, MI); if (LIS) - LIS->handleMove(MI); + LIS->handleMove(*MI); ++Num3AddrSunk; return true; @@ -400,7 +400,7 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS) { if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) && - !LIS->isNotInMIMap(MI)) { + !LIS->isNotInMIMap(*MI)) { // FIXME: Sometimes tryInstructionTransform() will add instructions and // test whether they can be folded before keeping them. In this case it // sets a kill before recursively calling tryInstructionTransform() again. @@ -413,7 +413,7 @@ static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, if (!LI.hasAtLeastOneValue()) return false; - SlotIndex useIdx = LIS->getInstructionIndex(MI); + SlotIndex useIdx = LIS->getInstructionIndex(*MI); LiveInterval::const_iterator I = LI.find(useIdx); assert(I != LI.end() && "Reg must be live-in to use."); return !I->end.isBlock() && SlotIndex::isSameInstr(I->end, useIdx); @@ -539,6 +539,15 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { return TRI->regsOverlap(RegA, RegB); } +// Returns true if Reg is equal or aliased to at least one register in Set. +static bool regOverlapsSet(const SmallVectorImpl<unsigned> &Set, unsigned Reg, + const TargetRegisterInfo *TRI) { + for (unsigned R : Set) + if (TRI->regsOverlap(R, Reg)) + return true; + + return false; +} /// Return true if it's potentially profitable to commute the two-address /// instruction that's being processed. @@ -647,7 +656,7 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI, unsigned Dist) { unsigned RegC = MI->getOperand(RegCIdx).getReg(); DEBUG(dbgs() << "2addr: COMMUTING : " << *MI); - MachineInstr *NewMI = TII->commuteInstruction(MI, false, RegBIdx, RegCIdx); + MachineInstr *NewMI = TII->commuteInstruction(*MI, false, RegBIdx, RegCIdx); if (NewMI == nullptr) { DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n"); @@ -695,7 +704,7 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, unsigned Dist) { // FIXME: Why does convertToThreeAddress() need an iterator reference? MachineFunction::iterator MFI = MBB->getIterator(); - MachineInstr *NewMI = TII->convertToThreeAddress(MFI, mi, LV); + MachineInstr *NewMI = TII->convertToThreeAddress(MFI, *mi, LV); assert(MBB->getIterator() == MFI && "convertToThreeAddress changed iterator reference"); if (!NewMI) @@ -706,7 +715,7 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, bool Sunk = false; if (LIS) - LIS->ReplaceMachineInstrInMaps(mi, NewMI); + LIS->ReplaceMachineInstrInMaps(*mi, *NewMI); if (NewMI->findRegisterUseOperand(RegB, false, TRI)) // FIXME: Temporary workaround. If the new instruction doesn't @@ -808,7 +817,6 @@ void TwoAddressInstructionPass::processCopy(MachineInstr *MI) { } Processed.insert(MI); - return; } /// If there is one more local instruction that reads 'Reg' and it kills 'Reg, @@ -862,13 +870,13 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, if (!MI->isSafeToMove(AA, SeenStore)) return false; - if (TII->getInstrLatency(InstrItins, MI) > 1) + if (TII->getInstrLatency(InstrItins, *MI) > 1) // FIXME: Needs more sophisticated heuristics. return false; - SmallSet<unsigned, 2> Uses; - SmallSet<unsigned, 2> Kills; - SmallSet<unsigned, 2> Defs; + SmallVector<unsigned, 2> Uses; + SmallVector<unsigned, 2> Kills; + SmallVector<unsigned, 2> Defs; for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg()) continue; @@ -876,12 +884,12 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, if (!MOReg) continue; if (MO.isDef()) - Defs.insert(MOReg); + Defs.push_back(MOReg); else { - Uses.insert(MOReg); + Uses.push_back(MOReg); if (MOReg != Reg && (MO.isKill() || (LIS && isPlainlyKilled(MI, MOReg, LIS)))) - Kills.insert(MOReg); + Kills.push_back(MOReg); } } @@ -890,8 +898,9 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator AfterMI = std::next(Begin); MachineBasicBlock::iterator End = AfterMI; - while (End->isCopy() && Defs.count(End->getOperand(1).getReg())) { - Defs.insert(End->getOperand(0).getReg()); + while (End->isCopy() && + regOverlapsSet(Defs, End->getOperand(1).getReg(), TRI)) { + Defs.push_back(End->getOperand(0).getReg()); ++End; } @@ -899,47 +908,46 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, unsigned NumVisited = 0; MachineBasicBlock::iterator KillPos = KillMI; ++KillPos; - for (MachineBasicBlock::iterator I = End; I != KillPos; ++I) { - MachineInstr *OtherMI = I; + for (MachineInstr &OtherMI : llvm::make_range(End, KillPos)) { // DBG_VALUE cannot be counted against the limit. - if (OtherMI->isDebugValue()) + if (OtherMI.isDebugValue()) continue; if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. return false; ++NumVisited; - if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() || - OtherMI->isBranch() || OtherMI->isTerminator()) + if (OtherMI.hasUnmodeledSideEffects() || OtherMI.isCall() || + OtherMI.isBranch() || OtherMI.isTerminator()) // Don't move pass calls, etc. return false; - for (const MachineOperand &MO : OtherMI->operands()) { + for (const MachineOperand &MO : OtherMI.operands()) { if (!MO.isReg()) continue; unsigned MOReg = MO.getReg(); if (!MOReg) continue; if (MO.isDef()) { - if (Uses.count(MOReg)) + if (regOverlapsSet(Uses, MOReg, TRI)) // Physical register use would be clobbered. return false; - if (!MO.isDead() && Defs.count(MOReg)) + if (!MO.isDead() && regOverlapsSet(Defs, MOReg, TRI)) // May clobber a physical register def. // FIXME: This may be too conservative. It's ok if the instruction // is sunken completely below the use. return false; } else { - if (Defs.count(MOReg)) + if (regOverlapsSet(Defs, MOReg, TRI)) return false; - bool isKill = MO.isKill() || - (LIS && isPlainlyKilled(OtherMI, MOReg, LIS)); - if (MOReg != Reg && - ((isKill && Uses.count(MOReg)) || Kills.count(MOReg))) + bool isKill = + MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS)); + if (MOReg != Reg && ((isKill && regOverlapsSet(Uses, MOReg, TRI)) || + regOverlapsSet(Kills, MOReg, TRI))) // Don't want to extend other live ranges and update kills. return false; if (MOReg == Reg && !isKill) // We can't schedule across a use of the register in question. return false; // Ensure that if this is register in question, its the kill we expect. - assert((MOReg != Reg || OtherMI == KillMI) && + assert((MOReg != Reg || &OtherMI == KillMI) && "Found multiple kills of a register in a basic block"); } } @@ -955,10 +963,9 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, // We have to move the copies first so that the MBB is still well-formed // when calling handleMove(). for (MachineBasicBlock::iterator MBBI = AfterMI; MBBI != End;) { - MachineInstr *CopyMI = MBBI; - ++MBBI; + auto CopyMI = MBBI++; MBB->splice(InsertPos, MBB, CopyMI); - LIS->handleMove(CopyMI); + LIS->handleMove(*CopyMI); InsertPos = CopyMI; } End = std::next(MachineBasicBlock::iterator(MI)); @@ -970,10 +977,10 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, // Update live variables if (LIS) { - LIS->handleMove(MI); + LIS->handleMove(*MI); } else { - LV->removeVirtualRegisterKilled(Reg, KillMI); - LV->addVirtualRegisterKilled(Reg, MI); + LV->removeVirtualRegisterKilled(Reg, *KillMI); + LV->addVirtualRegisterKilled(Reg, *MI); } DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI); @@ -994,7 +1001,7 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, return true; // Below MI unsigned DefDist = DDI->second; assert(Dist > DefDist && "Visited def already?"); - if (TII->getInstrLatency(InstrItins, &DefMI) > (Dist - DefDist)) + if (TII->getInstrLatency(InstrItins, DefMI) > (Dist - DefDist)) return true; } return false; @@ -1074,21 +1081,20 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, // Check if the reschedule will not break depedencies. unsigned NumVisited = 0; - MachineBasicBlock::iterator KillPos = KillMI; - for (MachineBasicBlock::iterator I = mi; I != KillPos; ++I) { - MachineInstr *OtherMI = I; + for (MachineInstr &OtherMI : + llvm::make_range(mi, MachineBasicBlock::iterator(KillMI))) { // DBG_VALUE cannot be counted against the limit. - if (OtherMI->isDebugValue()) + if (OtherMI.isDebugValue()) continue; if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. return false; ++NumVisited; - if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() || - OtherMI->isBranch() || OtherMI->isTerminator()) + if (OtherMI.hasUnmodeledSideEffects() || OtherMI.isCall() || + OtherMI.isBranch() || OtherMI.isTerminator()) // Don't move pass calls, etc. return false; SmallVector<unsigned, 2> OtherDefs; - for (const MachineOperand &MO : OtherMI->operands()) { + for (const MachineOperand &MO : OtherMI.operands()) { if (!MO.isReg()) continue; unsigned MOReg = MO.getReg(); @@ -1102,8 +1108,8 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, if (Kills.count(MOReg)) // Don't want to extend other live ranges and update kills. return false; - if (OtherMI != MI && MOReg == Reg && - !(MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS)))) + if (&OtherMI != MI && MOReg == Reg && + !(MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS)))) // We can't schedule across a use of the register in question. return false; } else { @@ -1138,10 +1144,10 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, // Update live variables if (LIS) { - LIS->handleMove(KillMI); + LIS->handleMove(*KillMI); } else { - LV->removeVirtualRegisterKilled(Reg, KillMI); - LV->addVirtualRegisterKilled(Reg, MI); + LV->removeVirtualRegisterKilled(Reg, *KillMI); + LV->addVirtualRegisterKilled(Reg, *MI); } DEBUG(dbgs() << "\trescheduled kill: " << *KillMI); @@ -1175,7 +1181,7 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, // other commutable operands and does not change the values of passed // variables. if (OtherOpIdx == BaseOpIdx || - !TII->findCommutedOpIndices(MI, BaseOpIdx, OtherOpIdx)) + !TII->findCommutedOpIndices(*MI, BaseOpIdx, OtherOpIdx)) continue; unsigned OtherOpReg = MI->getOperand(OtherOpIdx).getReg(); @@ -1308,9 +1314,9 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF)); unsigned Reg = MRI->createVirtualRegister(RC); SmallVector<MachineInstr *, 2> NewMIs; - if (!TII->unfoldMemoryOperand(*MF, &MI, Reg, - /*UnfoldLoad=*/true,/*UnfoldStore=*/false, - NewMIs)) { + if (!TII->unfoldMemoryOperand(*MF, MI, Reg, + /*UnfoldLoad=*/true, + /*UnfoldStore=*/false, NewMIs)) { DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); return false; } @@ -1347,25 +1353,25 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, if (MO.isUse()) { if (MO.isKill()) { if (NewMIs[0]->killsRegister(MO.getReg())) - LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[0]); + LV->replaceKillInstruction(MO.getReg(), MI, *NewMIs[0]); else { assert(NewMIs[1]->killsRegister(MO.getReg()) && "Kill missing after load unfold!"); - LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[1]); + LV->replaceKillInstruction(MO.getReg(), MI, *NewMIs[1]); } } - } else if (LV->removeVirtualRegisterDead(MO.getReg(), &MI)) { + } else if (LV->removeVirtualRegisterDead(MO.getReg(), MI)) { if (NewMIs[1]->registerDefIsDead(MO.getReg())) - LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]); + LV->addVirtualRegisterDead(MO.getReg(), *NewMIs[1]); else { assert(NewMIs[0]->registerDefIsDead(MO.getReg()) && "Dead flag missing after load unfold!"); - LV->addVirtualRegisterDead(MO.getReg(), NewMIs[0]); + LV->addVirtualRegisterDead(MO.getReg(), *NewMIs[0]); } } } } - LV->addVirtualRegisterKilled(Reg, NewMIs[1]); + LV->addVirtualRegisterKilled(Reg, *NewMIs[1]); } SmallVector<unsigned, 4> OrigRegs; @@ -1518,17 +1524,17 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, // Update DistanceMap. MachineBasicBlock::iterator PrevMI = MI; --PrevMI; - DistanceMap.insert(std::make_pair(PrevMI, Dist)); + DistanceMap.insert(std::make_pair(&*PrevMI, Dist)); DistanceMap[MI] = ++Dist; if (LIS) { - LastCopyIdx = LIS->InsertMachineInstrInMaps(PrevMI).getRegSlot(); + LastCopyIdx = LIS->InsertMachineInstrInMaps(*PrevMI).getRegSlot(); if (TargetRegisterInfo::isVirtualRegister(RegA)) { LiveInterval &LI = LIS->getInterval(RegA); VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); SlotIndex endIdx = - LIS->getInstructionIndex(MI).getRegSlot(IsEarlyClobber); + LIS->getInstructionIndex(*MI).getRegSlot(IsEarlyClobber); LI.addSegment(LiveInterval::Segment(LastCopyIdx, endIdx, VNI)); } } @@ -1574,16 +1580,16 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, } // Update live variables for regB. - if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(MI)) { + if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(*MI)) { MachineBasicBlock::iterator PrevMI = MI; --PrevMI; - LV->addVirtualRegisterKilled(RegB, PrevMI); + LV->addVirtualRegisterKilled(RegB, *PrevMI); } // Update LiveIntervals. if (LIS) { LiveInterval &LI = LIS->getInterval(RegB); - SlotIndex MIIdx = LIS->getInstructionIndex(MI); + SlotIndex MIIdx = LIS->getInstructionIndex(*MI); LiveInterval::const_iterator I = LI.find(MIIdx); assert(I != LI.end() && "RegB must be live-in to use."); @@ -1650,13 +1656,13 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { if (mi->isRegSequence()) eliminateRegSequence(mi); - DistanceMap.insert(std::make_pair(mi, ++Dist)); + DistanceMap.insert(std::make_pair(&*mi, ++Dist)); processCopy(&*mi); // First scan through all the tied register uses in this instruction // and record a list of pairs of tied operands for each register. - if (!collectTiedOperands(mi, TiedOperands)) { + if (!collectTiedOperands(&*mi, TiedOperands)) { mi = nmi; continue; } @@ -1689,7 +1695,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { // Now iterate over the information collected above. for (auto &TO : TiedOperands) { - processTiedPairs(mi, TO.second, Dist); + processTiedPairs(&*mi, TO.second, Dist); DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); } @@ -1733,27 +1739,27 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { /// void TwoAddressInstructionPass:: eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { - MachineInstr *MI = MBBI; - unsigned DstReg = MI->getOperand(0).getReg(); - if (MI->getOperand(0).getSubReg() || + MachineInstr &MI = *MBBI; + unsigned DstReg = MI.getOperand(0).getReg(); + if (MI.getOperand(0).getSubReg() || TargetRegisterInfo::isPhysicalRegister(DstReg) || - !(MI->getNumOperands() & 1)) { - DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI); + !(MI.getNumOperands() & 1)) { + DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI); llvm_unreachable(nullptr); } SmallVector<unsigned, 4> OrigRegs; if (LIS) { - OrigRegs.push_back(MI->getOperand(0).getReg()); - for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) - OrigRegs.push_back(MI->getOperand(i).getReg()); + OrigRegs.push_back(MI.getOperand(0).getReg()); + for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) + OrigRegs.push_back(MI.getOperand(i).getReg()); } bool DefEmitted = false; - for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { - MachineOperand &UseMO = MI->getOperand(i); + for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) { + MachineOperand &UseMO = MI.getOperand(i); unsigned SrcReg = UseMO.getReg(); - unsigned SubIdx = MI->getOperand(i+1).getImm(); + unsigned SubIdx = MI.getOperand(i+1).getImm(); // Nothing needs to be inserted for <undef> operands. if (UseMO.isUndef()) continue; @@ -1763,18 +1769,18 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { bool isKill = UseMO.isKill(); if (isKill) for (unsigned j = i + 2; j < e; j += 2) - if (MI->getOperand(j).getReg() == SrcReg) { - MI->getOperand(j).setIsKill(); + if (MI.getOperand(j).getReg() == SrcReg) { + MI.getOperand(j).setIsKill(); UseMO.setIsKill(false); isKill = false; break; } // Insert the sub-register copy. - MachineInstr *CopyMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + MachineInstr *CopyMI = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY)) - .addReg(DstReg, RegState::Define, SubIdx) - .addOperand(UseMO); + .addReg(DstReg, RegState::Define, SubIdx) + .addOperand(UseMO); // The first def needs an <undef> flag because there is no live register // before it. @@ -1787,7 +1793,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { // Update LiveVariables' kill info. if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg)) - LV->replaceKillInstruction(SrcReg, MI, CopyMI); + LV->replaceKillInstruction(SrcReg, MI, *CopyMI); DEBUG(dbgs() << "Inserted: " << *CopyMI); } @@ -1796,13 +1802,13 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { std::next(MachineBasicBlock::iterator(MI)); if (!DefEmitted) { - DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF"); - MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); - for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) - MI->RemoveOperand(j); + DEBUG(dbgs() << "Turned: " << MI << " into an IMPLICIT_DEF"); + MI.setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); + for (int j = MI.getNumOperands() - 1, ee = 0; j > ee; --j) + MI.RemoveOperand(j); } else { - DEBUG(dbgs() << "Eliminated: " << *MI); - MI->eraseFromParent(); + DEBUG(dbgs() << "Eliminated: " << MI); + MI.eraseFromParent(); } // Udpate LiveIntervals. diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp index 8c9631e..501e01c 100644 --- a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp +++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -20,7 +20,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/UnreachableBlockElim.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineDominators.h" @@ -28,6 +28,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Dominators.h" @@ -38,29 +39,7 @@ #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; -namespace { - class UnreachableBlockElim : public FunctionPass { - bool runOnFunction(Function &F) override; - public: - static char ID; // Pass identification, replacement for typeid - UnreachableBlockElim() : FunctionPass(ID) { - initializeUnreachableBlockElimPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addPreserved<DominatorTreeWrapperPass>(); - } - }; -} -char UnreachableBlockElim::ID = 0; -INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim", - "Remove unreachable blocks from the CFG", false, false) - -FunctionPass *llvm::createUnreachableBlockEliminationPass() { - return new UnreachableBlockElim(); -} - -bool UnreachableBlockElim::runOnFunction(Function &F) { +static bool eliminateUnreachableBlock(Function &F) { SmallPtrSet<BasicBlock*, 8> Reachable; // Mark all reachable blocks. @@ -91,6 +70,41 @@ bool UnreachableBlockElim::runOnFunction(Function &F) { return !DeadBlocks.empty(); } +namespace { +class UnreachableBlockElimLegacyPass : public FunctionPass { + bool runOnFunction(Function &F) override { + return eliminateUnreachableBlock(F); + } + +public: + static char ID; // Pass identification, replacement for typeid + UnreachableBlockElimLegacyPass() : FunctionPass(ID) { + initializeUnreachableBlockElimLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<DominatorTreeWrapperPass>(); + } +}; +} +char UnreachableBlockElimLegacyPass::ID = 0; +INITIALIZE_PASS(UnreachableBlockElimLegacyPass, "unreachableblockelim", + "Remove unreachable blocks from the CFG", false, false) + +FunctionPass *llvm::createUnreachableBlockEliminationPass() { + return new UnreachableBlockElimLegacyPass(); +} + +PreservedAnalyses UnreachableBlockElimPass::run(Function &F, + FunctionAnalysisManager &AM) { + bool Changed = eliminateUnreachableBlock(F); + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<DominatorTreeAnalysis>(); + return PA; +} namespace { class UnreachableMachineBlockElim : public MachineFunctionPass { @@ -184,9 +198,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { unsigned Input = phi->getOperand(1).getReg(); unsigned Output = phi->getOperand(0).getReg(); - MachineInstr* temp = phi; - ++phi; - temp->eraseFromParent(); + phi++->eraseFromParent(); ModifiedPHI = true; if (Input != Output) { diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp index bf1c0dc..8a3a032 100644 --- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/VirtRegMap.h" #include "LiveDebugVariables.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SparseSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" @@ -29,7 +28,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -84,7 +82,7 @@ unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { bool VirtRegMap::hasPreferredPhys(unsigned VirtReg) { unsigned Hint = MRI->getSimpleHint(VirtReg); if (!Hint) - return 0; + return false; if (TargetRegisterInfo::isVirtualRegister(Hint)) Hint = getPhys(Hint); return getPhys(VirtReg) == Hint; @@ -139,7 +137,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VirtRegMap::dump() const { +LLVM_DUMP_METHOD void VirtRegMap::dump() const { print(dbgs()); } #endif @@ -168,6 +166,7 @@ class VirtRegRewriter : public MachineFunctionPass { void addMBBLiveIns(); bool readsUndefSubreg(const MachineOperand &MO) const; void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const; + void handleIdentityCopy(MachineInstr &MI) const; public: static char ID; @@ -176,6 +175,10 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnMachineFunction(MachineFunction&) override; + MachineFunctionProperties getSetProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::AllVRegsAllocated); + } }; } // end anonymous namespace @@ -329,7 +332,7 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const { unsigned Reg = MO.getReg(); const LiveInterval &LI = LIS->getInterval(Reg); const MachineInstr &MI = *MO.getParent(); - SlotIndex BaseIndex = LIS->getInstructionIndex(&MI); + SlotIndex BaseIndex = LIS->getInstructionIndex(MI); // This code is only meant to handle reading undefined subregisters which // we couldn't properly detect before. assert(LI.liveAt(BaseIndex) && @@ -344,6 +347,30 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const { return true; } +void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const { + if (!MI.isIdentityCopy()) + return; + DEBUG(dbgs() << "Identity copy: " << MI); + ++NumIdCopies; + + // Copies like: + // %R0 = COPY %R0<undef> + // %AL = COPY %AL, %EAX<imp-def> + // give us additional liveness information: The target (super-)register + // must not be valid before this point. Replace the COPY with a KILL + // instruction to maintain this information. + if (MI.getOperand(0).isUndef() || MI.getNumOperands() > 2) { + MI.setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << " replace by: " << MI); + return; + } + + if (Indexes) + Indexes->removeMachineInstrFromMaps(MI); + MI.eraseFromParent(); + DEBUG(dbgs() << " deleted.\n"); +} + void VirtRegRewriter::rewrite() { bool NoSubRegLiveness = !MRI->subRegLivenessEnabled(); SmallVector<unsigned, 8> SuperDeads; @@ -433,16 +460,8 @@ void VirtRegRewriter::rewrite() { DEBUG(dbgs() << "> " << *MI); - // Finally, remove any identity copies. - if (MI->isIdentityCopy()) { - ++NumIdCopies; - DEBUG(dbgs() << "Deleting identity copy.\n"); - if (Indexes) - Indexes->removeMachineInstrFromMaps(MI); - // It's safe to erase MI because MII has already been incremented. - MI->eraseFromParent(); - } + // We can remove identity copies right now. + handleIdentityCopy(*MI); } } } - diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp index 14ec911..041fb7b 100644 --- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -254,9 +254,11 @@ static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo, FuncInfo.FuncletBaseStateMap[CatchPad] = CatchLow; for (const User *U : CatchPad->users()) { const auto *UserI = cast<Instruction>(U); - if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI)) - if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest()) + if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI)) { + BasicBlock *UnwindDest = InnerCatchSwitch->getUnwindDest(); + if (!UnwindDest || UnwindDest == CatchSwitch->getUnwindDest()) calculateCXXStateNumbers(FuncInfo, UserI, CatchLow); + } if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI)) { BasicBlock *UnwindDest = getCleanupRetUnwindDest(InnerCleanupPad); // If a nested cleanup pad reports a null unwind destination and the @@ -361,9 +363,11 @@ static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo, // outside the __try. for (const User *U : CatchPad->users()) { const auto *UserI = cast<Instruction>(U); - if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI)) - if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest()) + if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI)) { + BasicBlock *UnwindDest = InnerCatchSwitch->getUnwindDest(); + if (!UnwindDest || UnwindDest == CatchSwitch->getUnwindDest()) calculateSEHStateNumbers(FuncInfo, UserI, ParentState); + } if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI)) { BasicBlock *UnwindDest = getCleanupRetUnwindDest(InnerCleanupPad); // If a nested cleanup pad reports a null unwind destination and the @@ -783,7 +787,7 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) { // Loop over all instructions, fixing each one as we find it... for (Instruction &I : *BB) RemapInstruction(&I, VMap, - RF_IgnoreMissingEntries | RF_NoModuleLevelChanges); + RF_IgnoreMissingLocals | RF_NoModuleLevelChanges); // Catchrets targeting cloned blocks need to be updated separately from // the loop above because they are not in the current funclet. @@ -795,7 +799,7 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) { FixupCatchrets.clear(); for (BasicBlock *Pred : predecessors(OldBlock)) if (auto *CatchRet = dyn_cast<CatchReturnInst>(Pred->getTerminator())) - if (CatchRet->getParentPad() == FuncletToken) + if (CatchRet->getCatchSwitchParentPad() == FuncletToken) FixupCatchrets.push_back(CatchRet); for (CatchReturnInst *CatchRet : FixupCatchrets) @@ -810,7 +814,7 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) { bool EdgeTargetsFunclet; if (auto *CRI = dyn_cast<CatchReturnInst>(IncomingBlock->getTerminator())) { - EdgeTargetsFunclet = (CRI->getParentPad() == FuncletToken); + EdgeTargetsFunclet = (CRI->getCatchSwitchParentPad() == FuncletToken); } else { ColorVector &IncomingColors = BlockColors[IncomingBlock]; assert(!IncomingColors.empty() && "Block not colored!"); @@ -944,10 +948,11 @@ void WinEHPrepare::removeImplausibleInstructions(Function &F) { if (FuncletBundleOperand == FuncletPad) continue; - // Skip call sites which are nounwind intrinsics. + // Skip call sites which are nounwind intrinsics or inline asm. auto *CalledFn = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); - if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow()) + if (CalledFn && ((CalledFn->isIntrinsic() && CS.doesNotThrow()) || + CS.isInlineAsm())) continue; // This call site was not part of this funclet, remove it. diff --git a/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp new file mode 100644 index 0000000..1f95708 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp @@ -0,0 +1,96 @@ +//===-- XRayInstrumentation.cpp - Adds XRay instrumentation to functions. -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a MachineFunctionPass that inserts the appropriate +// XRay instrumentation instructions. We look for XRay-specific attributes +// on the function to determine whether we should insert the replacement +// operations. +// +//===---------------------------------------------------------------------===// + +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +namespace { +struct XRayInstrumentation : public MachineFunctionPass { + static char ID; + + XRayInstrumentation() : MachineFunctionPass(ID) { + initializeXRayInstrumentationPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} + +bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { + auto &F = *MF.getFunction(); + auto InstrAttr = F.getFnAttribute("function-instrument"); + bool AlwaysInstrument = !InstrAttr.hasAttribute(Attribute::None) && + InstrAttr.isStringAttribute() && + InstrAttr.getValueAsString() == "xray-always"; + Attribute Attr = F.getFnAttribute("xray-instruction-threshold"); + unsigned XRayThreshold = 0; + if (!AlwaysInstrument) { + if (Attr.hasAttribute(Attribute::None) || !Attr.isStringAttribute()) + return false; // XRay threshold attribute not found. + if (Attr.getValueAsString().getAsInteger(10, XRayThreshold)) + return false; // Invalid value for threshold. + if (F.size() < XRayThreshold) + return false; // Function is too small. + } + + // FIXME: Do the loop triviality analysis here or in an earlier pass. + + // First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the + // MachineFunction. + auto &FirstMBB = *MF.begin(); + auto &FirstMI = *FirstMBB.begin(); + auto *TII = MF.getSubtarget().getInstrInfo(); + BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(), + TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER)); + + // Then we look for *all* terminators and returns, then replace those with + // PATCHABLE_RET instructions. + SmallVector<MachineInstr *, 4> Terminators; + for (auto &MBB : MF) { + for (auto &T : MBB.terminators()) { + // FIXME: Handle tail calls here too? + if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) { + // Replace return instructions with: + // PATCHABLE_RET <Opcode>, <Operand>... + auto MIB = BuildMI(MBB, T, T.getDebugLoc(), + TII->get(TargetOpcode::PATCHABLE_RET)) + .addImm(T.getOpcode()); + for (auto &MO : T.operands()) + MIB.addOperand(MO); + Terminators.push_back(&T); + break; + } + } + } + + for (auto &I : Terminators) + I->eraseFromParent(); + + return true; +} + +char XRayInstrumentation::ID = 0; +char &llvm::XRayInstrumentationID = XRayInstrumentation::ID; +INITIALIZE_PASS(XRayInstrumentation, "xray-instrumentation", "Insert XRay ops", + false, false) diff --git a/contrib/llvm/lib/CodeGen/module.modulemap b/contrib/llvm/lib/CodeGen/module.modulemap deleted file mode 100644 index d4f68bc..0000000 --- a/contrib/llvm/lib/CodeGen/module.modulemap +++ /dev/null @@ -1 +0,0 @@ -module CodeGen { requires cplusplus umbrella "." module * { export * } } |